check

2025-10-22 02:08:58 +08:00 · 2024-04-25 17:23:29 +02:00
2697 changed files with 60458 additions and 157595 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -12,7 +12,7 @@ jobs:
    # Ensure running with CircleCI/huggingface
    check_circleci_user:
        docker:
-            - image: python:3.10-slim
+            - image: cimg/python:3.8.12
        parallelism: 1
        steps:
            - run: echo $CIRCLE_PROJECT_USERNAME
@ -26,63 +26,87 @@ jobs:
    fetch_tests:
        working_directory: ~/transformers
        docker:
-            - image: huggingface/transformers-quality
+            - image: cimg/python:3.8.12
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -U -e .
-            - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
+            - run: pip install --upgrade --upgrade-strategy eager pip
+            - run: pip install -U --upgrade-strategy eager GitPython
+            - run: pip install -U --upgrade-strategy eager .
            - run: mkdir -p test_preparation
            - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
-            - run: python utils/tests_fetcher.py --filter_tests
-            - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
+            - store_artifacts:
+                  path: ~/transformers/tests_fetched_summary.txt
            - run: |
-                if [ ! -s test_preparation/generated_config.yml ]; then
-                    echo "No tests to run, exiting early!"
-                    circleci-agent step halt
+                if [ -f test_list.txt ]; then
+                    cp test_list.txt test_preparation/test_list.txt
+                else
+                    touch test_preparation/test_list.txt
+                fi
+            - run: |
+                  if [ -f examples_test_list.txt ]; then
+                      mv examples_test_list.txt test_preparation/examples_test_list.txt
+                  else
+                      touch test_preparation/examples_test_list.txt
+                  fi
+            - run: |
+                  if [ -f filtered_test_list_cross_tests.txt ]; then
+                      mv filtered_test_list_cross_tests.txt test_preparation/filtered_test_list_cross_tests.txt
+                  else
+                      touch test_preparation/filtered_test_list_cross_tests.txt
+                  fi
+            - run: |
+                if [ -f doctest_list.txt ]; then
+                    cp doctest_list.txt test_preparation/doctest_list.txt
+                else
+                    touch test_preparation/doctest_list.txt
+                fi
+            - run: |
+                if [ -f test_repo_utils.txt ]; then
+                    mv test_repo_utils.txt test_preparation/test_repo_utils.txt
+                else
+                    touch test_preparation/test_repo_utils.txt
+                fi
+            - run: python utils/tests_fetcher.py --filter_tests
+            - run: |
+                if [ -f test_list.txt ]; then
+                    mv test_list.txt test_preparation/filtered_test_list.txt
+                else
+                    touch test_preparation/filtered_test_list.txt
                fi
-
            - store_artifacts:
-                path: test_preparation
-
-            - run:
-                name: "Retrieve Artifact Paths"
-                env:
-                    CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
-                command: |
-                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
-                    job_number=${CIRCLE_BUILD_NUM}
-                    url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
-                    curl -o  test_preparation/artifacts.json ${url}
-            - run:
-                name: "Prepare pipeline parameters"
-                command: |
-                    python utils/process_test_artifacts.py 
-            
-            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
-            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
-            # We used:
-
-            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
-            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
-                
+                  path: test_preparation/test_list.txt
            - store_artifacts:
-                path: test_preparation/transformed_artifacts.json
+                  path: test_preparation/doctest_list.txt
            - store_artifacts:
-                path: test_preparation/artifacts.json
+                  path: ~/transformers/test_preparation/filtered_test_list.txt
+            - store_artifacts:
+                  path: test_preparation/examples_test_list.txt
+            - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation
+            - run: |
+                  if [ ! -s test_preparation/generated_config.yml ]; then
+                      echo "No tests to run, exiting early!"
+                      circleci-agent step halt
+                  fi
+            - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt
+            - store_artifacts:
+                  path: test_preparation/generated_config.txt
+            - store_artifacts:
+                  path: test_preparation/filtered_test_list_cross_tests.txt
            - continuation/continue:
-                parameters:  test_preparation/transformed_artifacts.json
-                configuration_path: test_preparation/generated_config.yml
+                  configuration_path: test_preparation/generated_config.yml

    # To run all tests for the nightly build
    fetch_all_tests:
        working_directory: ~/transformers
        docker:
-            - image: huggingface/transformers-quality
+            - image: cimg/python:3.8.12
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -e .
+            - run: pip install --upgrade --upgrade-strategy eager pip
+            - run: pip install -U --upgrade-strategy eager GitPython
+            - run: pip install -U --upgrade-strategy eager .
            - run: |
                  mkdir test_preparation
                  echo -n "tests" > test_preparation/test_list.txt
@ -102,7 +126,7 @@ jobs:
    check_code_quality:
        working_directory: ~/transformers
        docker:
-            - image: huggingface/transformers-quality
+            - image: cimg/python:3.8.12
        resource_class: large
        environment:
            TRANSFORMERS_IS_CI: yes
@ -110,7 +134,24 @@ jobs:
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -e .
+            - restore_cache:
+                  keys:
+                      - v0.7-code_quality-pip-{{ checksum "setup.py" }}
+                      - v0.7-code-quality-pip
+            - restore_cache:
+                  keys:
+                      - v0.7-code_quality-site-packages-{{ checksum "setup.py" }}
+                      - v0.7-code-quality-site-packages
+            - run: pip install --upgrade --upgrade-strategy eager pip
+            - run: pip install -U --upgrade-strategy eager .[all,quality]
+            - save_cache:
+                  key: v0.7-code_quality-pip-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - save_cache:
+                  key: v0.7-code_quality-site-packages-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.pyenv/versions/'
            - run:
                name: Show installed libraries and their versions
                command: pip freeze | tee installed.txt
@ -122,12 +163,11 @@ jobs:
            - run: python utils/custom_init_isort.py --check_only
            - run: python utils/sort_auto_mappings.py --check_only
            - run: python utils/check_doc_toc.py
-            - run: python utils/check_docstrings.py --check_all

    check_repository_consistency:
        working_directory: ~/transformers
        docker:
-            - image: huggingface/transformers-consistency
+            - image: cimg/python:3.8.12
        resource_class: large
        environment:
            TRANSFORMERS_IS_CI: yes
@ -135,7 +175,24 @@ jobs:
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -e .
+            - restore_cache:
+                  keys:
+                      - v0.7-repository_consistency-pip-{{ checksum "setup.py" }}
+                      - v0.7-repository_consistency-pip
+            - restore_cache:
+                  keys:
+                      - v0.7-repository_consistency-site-packages-{{ checksum "setup.py" }}
+                      - v0.7-repository_consistency-site-packages
+            - run: pip install --upgrade --upgrade-strategy eager pip
+            - run: pip install -U --upgrade-strategy eager .[all,quality]
+            - save_cache:
+                  key: v0.7-repository_consistency-pip-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.cache/pip'
+            - save_cache:
+                  key: v0.7-repository_consistency-site-packages-{{ checksum "setup.py" }}
+                  paths:
+                      - '~/.pyenv/versions/'
            - run:
                name: Show installed libraries and their versions
                command: pip freeze | tee installed.txt
@ -171,4 +228,4 @@ workflows:
            - check_circleci_user
            - check_code_quality
            - check_repository_consistency
-            - fetch_all_tests
+            - fetch_all_tests
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -19,7 +19,7 @@ import os
 import random
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
-import glob
+
 import yaml


@ -32,7 +32,7 @@ COMMON_ENV_VARIABLES = {
    "RUN_PT_FLAX_CROSS_TESTS": False,
 }
 # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
-COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf":None}
+COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "v": None}
 DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]


@ -41,6 +41,7 @@ class EmptyJob:

    def to_dict(self):
        return {
+            "working_directory": "~/transformers",
            "docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
            "steps":["checkout"],
        }
@ -50,15 +51,17 @@ class EmptyJob:
 class CircleCIJob:
    name: str
    additional_env: Dict[str, Any] = None
+    cache_name: str = None
+    cache_version: str = "0.8.2"
    docker_image: List[Dict[str, str]] = None
    install_steps: List[str] = None
    marker: Optional[str] = None
-    parallelism: Optional[int] = 0
+    parallelism: Optional[int] = 1
    pytest_num_workers: int = 12
    pytest_options: Dict[str, Any] = None
    resource_class: Optional[str] = "2xlarge"
    tests_to_run: Optional[List[str]] = None
-    num_test_files_per_worker: Optional[int] = 10
+    working_directory: str = "~/transformers"
    # This should be only used for doctest job!
    command_timeout: Optional[int] = None

@ -66,107 +69,227 @@ class CircleCIJob:
        # Deal with defaults for mutable attributes.
        if self.additional_env is None:
            self.additional_env = {}
+        if self.cache_name is None:
+            self.cache_name = self.name
        if self.docker_image is None:
            # Let's avoid changing the default list and make a copy.
            self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE)
-        else:
-            # BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
-            print(os.environ.get("GIT_COMMIT_MESSAGE"))
-            if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci":
-                self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
-            print(f"Using {self.docker_image} docker image")
        if self.install_steps is None:
-            self.install_steps = ["uv venv && uv pip install ."]
+            self.install_steps = []
        if self.pytest_options is None:
            self.pytest_options = {}
        if isinstance(self.tests_to_run, str):
            self.tests_to_run = [self.tests_to_run]
-        else:
-            test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt")
-            print("Looking for ", test_file)
-            if os.path.exists(test_file):
-                with open(test_file) as f:
-                    expanded_tests = f.read().strip().split("\n")
-                self.tests_to_run = expanded_tests
-                print("Found:", expanded_tests)
-            else:
-                self.tests_to_run = []
-                print("not Found")
+        if self.parallelism is None:
+            self.parallelism = 1

    def to_dict(self):
        env = COMMON_ENV_VARIABLES.copy()
        env.update(self.additional_env)

+        cache_branch_prefix = os.environ.get("CIRCLE_BRANCH", "pull")
+        if cache_branch_prefix != "main":
+            cache_branch_prefix = "pull"
+
        job = {
+            "working_directory": self.working_directory,
            "docker": self.docker_image,
            "environment": env,
        }
        if self.resource_class is not None:
            job["resource_class"] = self.resource_class
+        if self.parallelism is not None:
+            job["parallelism"] = self.parallelism
+        steps = [
+            "checkout",
+            {"attach_workspace": {"at": "~/transformers/test_preparation"}},
+            {
+                "restore_cache": {
+                    "keys": [
+                        # check the fully-matched cache first
+                        f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}',
+                        # try the partially-matched cache from `main`
+                        f"v{self.cache_version}-{self.cache_name}-main-pip-",
+                        # try the general partially-matched cache
+                        f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-",
+                    ]
+                }
+            },
+            {
+                "restore_cache": {
+                    "keys": [
+                        f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}',
+                        f"v{self.cache_version}-{self.cache_name}-main-site-packages-",
+                        f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-",
+                    ]
+                }
+            },
+        ]
+        steps.extend([{"run": l} for l in self.install_steps])
+        steps.extend([{"run": 'pip install "fsspec>=2023.5.0,<2023.10.0"'}])
+        steps.extend([{"run": "pip install pytest-subtests"}])
+        steps.append({"run": {"name": "Show installed libraries and their versions", "command": "pip freeze | tee installed.txt"}})
+        steps.append({"store_artifacts": {"path": "~/transformers/installed.txt"}})

        all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
        pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
        pytest_flags.append(
            f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
        )
-                # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
-        timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
-        marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
-        additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
-        parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
-        steps = [
-            "checkout",
-            {"attach_workspace": {"at": "test_preparation"}},
-            {"run": "apt-get update && apt-get install -y curl"},
-            {"run": " && ".join(self.install_steps)},
-            {"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"},
-            {"run": {
-                    "name": "Show installed libraries and their size",
-                    "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}
-            },
-            {"run": {
-                "name": "Show installed libraries and their versions",
-                "command": """pip list --format=freeze | tee installed.txt || true"""}
-            },
-            {"run": {
-                "name": "Show biggest libraries",
-                "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
-            },
-            {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
-            {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>>' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
-                        {"run": {"name": "Split tests across parallel nodes: show current parallel tests",
-                    "command": f"TESTS=$(circleci tests split  --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
-                    }
-            },
-            {"run": {
-                "name": "Run tests",
-                "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
-            },
-            {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
-            {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
-            {"run": {"name": "Errors",                       "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
-            {"store_test_results": {"path": "test-results"}},
-            {"store_artifacts": {"path": "test-results/junit.xml"}},
-            {"store_artifacts": {"path": "reports"}},
-            {"store_artifacts": {"path": "tests.txt"}},
-            {"store_artifacts": {"path": "splitted_tests.txt"}},
-            {"store_artifacts": {"path": "installed.txt"}},
-        ]
-        if self.parallelism:
-            job["parallelism"] = parallel
+
+        steps.append({"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}})
+
+        test_command = ""
+        if self.command_timeout:
+            test_command = f"timeout {self.command_timeout} "
+        test_command += f"python -m pytest -rs --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+
+        if self.parallelism == 1:
+            if self.tests_to_run is None:
+                test_command += " << pipeline.parameters.tests_to_run >>"
+            else:
+                test_command += " " + " ".join(self.tests_to_run)
+        else:
+            # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime)
+            tests = self.tests_to_run
+            if tests is None:
+                folder = os.environ["test_preparation_dir"]
+                test_file = os.path.join(folder, "filtered_test_list.txt")
+                if os.path.exists(test_file):
+                    with open(test_file) as f:
+                        tests = f.read().split(" ")
+
+            # expand the test list
+            if tests == ["tests"]:
+                tests = [os.path.join("tests", x) for x in os.listdir("tests")]
+            expanded_tests = []
+            for test in tests:
+                if test.endswith(".py"):
+                    expanded_tests.append(test)
+                elif test == "tests/models":
+                    expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
+                elif test == "tests/pipelines":
+                    expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
+                else:
+                    expanded_tests.append(test)
+            # Avoid long tests always being collected together
+            random.shuffle(expanded_tests)
+            tests = " ".join(expanded_tests)
+
+            # Each executor to run ~10 tests
+            n_executors = max(len(tests) // 10, 1)
+            # Avoid empty test list on some executor(s) or launching too many executors
+            if n_executors > self.parallelism:
+                n_executors = self.parallelism
+            job["parallelism"] = n_executors
+
+            # Need to be newline separated for the command `circleci tests split` below
+            command = f'echo {tests} | tr " " "\\n" >> tests.txt'
+            steps.append({"run": {"name": "Get tests", "command": command}})
+
+            command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt'
+            steps.append({"run": {"name": "Split tests", "command": command}})
+
+            steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
+            steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
+
+            test_command = ""
+            if self.timeout:
+                test_command = f"timeout {self.timeout} "
+            test_command += f"python -m pytest -rs -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+            test_command += " $(cat splitted_tests.txt)"
+        if self.marker is not None:
+            test_command += f" -m {self.marker}"
+
+        if self.name == "pr_documentation_tests":
+            # can't use ` | tee tee tests_output.txt` as usual
+            test_command += " > tests_output.txt"
+            # Save the return code, so we can check if it is timeout in the next step.
+            test_command += '; touch "$?".txt'
+            # Never fail the test step for the doctest job. We will check the results in the next step, and fail that
+            # step instead if the actual test failures are found. This is to avoid the timeout being reported as test
+            # failure.
+            test_command = f"({test_command}) || true"
+        else:
+            test_command = f"({test_command} | tee tests_output.txt) || true"
+        steps.append({"run": {"name": "Run tests", "command": test_command}})
+
+        # Deal with errors
+        check_test_command = f'if [ -s reports/{self.job_name}/errors.txt ]; '
+        check_test_command += 'then echo "Some tests errored out!"; echo ""; '
+        check_test_command += f'cat reports/{self.job_name}/errors.txt; '
+        check_test_command += 'echo ""; echo ""; '
+
+        py_command = f'import os; fp = open("reports/{self.job_name}/summary_short.txt"); failed = os.linesep.join([x for x in fp.read().split(os.linesep) if x.startswith("ERROR ")]); fp.close(); fp = open("summary_short.txt", "w"); fp.write(failed); fp.close()'
+        check_test_command += f"$(python3 -c '{py_command}'); "
+        check_test_command += 'cat summary_short.txt; echo ""; exit -1; '
+
+        # Deeal with failed tests
+        check_test_command += f'elif [ -s reports/{self.job_name}/failures_short.txt ]; '
+        check_test_command += 'then echo "Some tests failed!"; echo ""; '
+        check_test_command += f'cat reports/{self.job_name}/failures_short.txt; '
+        check_test_command += 'echo ""; echo ""; '
+
+        py_command = f'import os; fp = open("reports/{self.job_name}/summary_short.txt"); failed = os.linesep.join([x for x in fp.read().split(os.linesep) if x.startswith("FAILED ")]); fp.close(); fp = open("summary_short.txt", "w"); fp.write(failed); fp.close()'
+        check_test_command += f"$(python3 -c '{py_command}'); "
+        check_test_command += 'cat summary_short.txt; echo ""; exit -1; '
+
+        check_test_command += f'elif [ -s reports/{self.job_name}/stats.txt ]; then echo "All tests pass!"; '
+
+        # return code `124` means the previous (pytest run) step is timeout
+        if self.name == "pr_documentation_tests":
+            check_test_command += 'elif [ -f 124.txt ]; then echo "doctest timeout!"; '
+
+        check_test_command += 'else echo "other fatal error"; echo ""; exit -1; fi;'
+
+        steps.append({"run": {"name": "Check test results", "command": check_test_command}})
+
+        steps.append({"store_test_results": {"path": "test-results"}})
+
+        steps.append({"store_artifacts": {"path": "~/transformers/tests_output.txt"}})
+        steps.append({"store_artifacts": {"path": "~/transformers/reports"}})
+
+        # save cache at the end: so pytest step runs before cache saving and we can see results earlier
+        steps.append(
+            {
+                "save_cache": {
+                    "key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}',
+                    "paths": ["~/.cache/pip"],
+                }
+            }
+        )
+        steps.append(
+            {
+                "save_cache": {
+                    "key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}',
+                    "paths": ["~/.pyenv/versions/"],
+                }
+            }
+        )
+
        job["steps"] = steps
        return job

    @property
    def job_name(self):
-        return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}"
+        return self.name if "examples" in self.name else f"tests_{self.name}"


 # JOBS
 torch_and_tf_job = CircleCIJob(
    "torch_and_tf",
-    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
    additional_env={"RUN_PT_TF_CROSS_TESTS": True},
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng git-lfs cmake",
+        "git lfs install",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]",
+        "pip install -U --upgrade-strategy eager tensorflow_probability",
+        # Without --no-deps we can't pin dependency versions in the future
+        "pip install -U --upgrade-strategy eager --no-deps -e git+https://github.com/huggingface/accelerate@main#egg=accelerate",
+        # TODO: remove this one after fixing the dependency issue(s) above
+        "pip install -U --upgrade-strategy eager torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu",
+    ],
    marker="is_pt_tf_cross_test",
    pytest_options={"rA": None, "durations": 0},
 )
@ -175,120 +298,170 @@ torch_and_tf_job = CircleCIJob(
 torch_and_flax_job = CircleCIJob(
    "torch_and_flax",
    additional_env={"RUN_PT_FLAX_CROSS_TESTS": True},
-    docker_image=[{"image":"huggingface/transformers-torch-jax-light"}],
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
+        "pip install -U --upgrade-strategy eager --upgrade pip",
+        "pip install -U --upgrade-strategy eager .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]",
+        # Without --no-deps we can't pin dependency versions in the future
+        "pip install -U --upgrade-strategy eager --no-deps -e git+https://github.com/huggingface/accelerate@main#egg=accelerate",
+    ],
    marker="is_pt_flax_cross_test",
    pytest_options={"rA": None, "durations": 0},
 )

+
 torch_job = CircleCIJob(
    "torch",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    marker="not generate",
-    parallelism=6,
-    pytest_num_workers=8
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng time",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
+        # Without --no-deps we can't pin dependency versions in the future
+        "pip install -U --upgrade-strategy eager --no-deps -e git+https://github.com/huggingface/accelerate@main#egg=accelerate",
+    ],
+    parallelism=1,
+    pytest_num_workers=12,
 )

-generate_job = CircleCIJob(
-    "generate",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    marker="generate",
-    parallelism=6,
-    pytest_num_workers=8
-)
-
-tokenization_job = CircleCIJob(
-    "tokenization",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    parallelism=8,
-    pytest_num_workers=16
-)
-
-processor_job = CircleCIJob(
-    "processors",
-    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    parallelism=8,
-    pytest_num_workers=6
-)

 tf_job = CircleCIJob(
    "tf",
-    docker_image=[{"image":"huggingface/transformers-tf-light"}],
-    parallelism=6,
-    pytest_num_workers=16,
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng cmake",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
+        "pip install -U --upgrade-strategy eager tensorflow_probability",
+    ],
+    parallelism=1,
 )


 flax_job = CircleCIJob(
    "flax",
-    docker_image=[{"image":"huggingface/transformers-jax-light"}],
-    parallelism=6,
-    pytest_num_workers=16
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[flax,testing,sentencepiece,flax-speech,vision]",
+    ],
+    parallelism=1,
 )


 pipelines_torch_job = CircleCIJob(
    "pipelines_torch",
    additional_env={"RUN_PIPELINE_TESTS": True},
-    docker_image=[{"image":"huggingface/transformers-torch-light"}],
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm,video]",
+    ],
    marker="is_pipeline_test",
-    parallelism=4
+    pytest_num_workers=12,
 )


 pipelines_tf_job = CircleCIJob(
    "pipelines_tf",
    additional_env={"RUN_PIPELINE_TESTS": True},
-    docker_image=[{"image":"huggingface/transformers-tf-light"}],
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y cmake",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[sklearn,tf-cpu,testing,sentencepiece,vision]",
+        "pip install -U --upgrade-strategy eager tensorflow_probability",
+    ],
    marker="is_pipeline_test",
-    parallelism=4
 )


 custom_tokenizers_job = CircleCIJob(
    "custom_tokenizers",
    additional_env={"RUN_CUSTOM_TOKENIZERS": True},
-    docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}],
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y cmake",
+        {
+            "name": "install jumanpp",
+            "command":
+                "wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz\n"
+                "tar xvf jumanpp-2.0.0-rc3.tar.xz\n"
+                "mkdir jumanpp-2.0.0-rc3/bld\n"
+                "cd jumanpp-2.0.0-rc3/bld\n"
+                "sudo cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local\n"
+                "sudo make install\n",
+        },
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]",
+        "python -m unidic download",
+    ],
+    parallelism=None,
+    resource_class=None,
+    tests_to_run=[
+        "./tests/models/bert_japanese/test_tokenization_bert_japanese.py",
+        "./tests/models/openai/test_tokenization_openai.py",
+        "./tests/models/clip/test_tokenization_clip.py",
+    ],
 )


 examples_torch_job = CircleCIJob(
    "examples_torch",
    additional_env={"OMP_NUM_THREADS": 8},
-    docker_image=[{"image":"huggingface/transformers-examples-torch"}],
-    # TODO @ArthurZucker remove this once docker is easier to build
-    install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
-    pytest_num_workers=8,
+    cache_name="torch_examples",
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[sklearn,torch,sentencepiece,testing,torch-speech]",
+        "pip install -U --upgrade-strategy eager -r examples/pytorch/_tests_requirements.txt",
+        # Without --no-deps we can't pin dependency versions in the future
+        "pip install -U --upgrade-strategy eager --no-deps -e git+https://github.com/huggingface/accelerate@main#egg=accelerate",
+    ],
+    pytest_num_workers=1,
 )


 examples_tensorflow_job = CircleCIJob(
    "examples_tensorflow",
-    additional_env={"OMP_NUM_THREADS": 8},
-    docker_image=[{"image":"huggingface/transformers-examples-tf"}],
-    pytest_num_workers=16,
+    cache_name="tensorflow_examples",
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y cmake",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[sklearn,tensorflow,sentencepiece,testing]",
+        "pip install -U --upgrade-strategy eager -r examples/tensorflow/_tests_requirements.txt",
+    ],
+)
+
+
+examples_flax_job = CircleCIJob(
+    "examples_flax",
+    cache_name="flax_examples",
+    install_steps=[
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[flax,testing,sentencepiece]",
+        "pip install -U --upgrade-strategy eager -r examples/flax/_tests_requirements.txt",
+    ],
 )


 hub_job = CircleCIJob(
    "hub",
    additional_env={"HUGGINGFACE_CO_STAGING": True},
-    docker_image=[{"image":"huggingface/transformers-torch-light"}],
    install_steps=[
-        'uv venv && uv pip install .',
+        "sudo apt-get -y update && sudo apt-get install git-lfs",
        'git config --global user.email "ci@dummy.com"',
        'git config --global user.name "ci"',
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[torch,sentencepiece,testing,vision]",
    ],
    marker="is_staging_test",
-    pytest_num_workers=2,
+    pytest_num_workers=1,
 )


 onnx_job = CircleCIJob(
    "onnx",
-    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
    install_steps=[
-        "uv venv",
-        "uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]",
+        "sudo apt-get -y update && sudo apt-get install -y cmake",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]",
    ],
    pytest_options={"k onnx": None},
    pytest_num_workers=1,
@ -297,18 +470,44 @@ onnx_job = CircleCIJob(

 exotic_models_job = CircleCIJob(
    "exotic_models",
-    docker_image=[{"image":"huggingface/transformers-exotic-models"}],
-    pytest_num_workers=12,
-    parallelism=4,
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[torch,testing,vision]",
+        "pip install -U --upgrade-strategy eager torchvision",
+        "pip install -U --upgrade-strategy eager scipy",
+        "pip install -U --upgrade-strategy eager 'git+https://github.com/facebookresearch/detectron2.git'",
+        "sudo apt install tesseract-ocr",
+        "pip install -U --upgrade-strategy eager pytesseract",
+        "pip install --upgrade-strategy eager sentencepiece",
+        "pip install -U --upgrade-strategy eager natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels",
+        "pip install -U --upgrade-strategy eager python-Levenshtein",
+        "pip install -U --upgrade-strategy eager opencv-python",
+        "pip install -U --upgrade-strategy eager nltk",
+        "pip uninstall -y torch torchvision torchaudio && pip install -U --upgrade-strategy eager 'torch<2.2.0' 'torchvision<0.17' 'torchaudio<2.2.0'"
+    ],
+    tests_to_run=[
+        "tests/models/*layoutlmv*",
+        "tests/models/*nat",
+        "tests/models/deta",
+        "tests/models/udop",
+        "tests/models/nougat",
+    ],
+    pytest_num_workers=1,
    pytest_options={"durations": 100},
 )


 repo_utils_job = CircleCIJob(
    "repo_utils",
-    docker_image=[{"image":"huggingface/transformers-consistency"}],
-    pytest_num_workers=4,
+    install_steps=[
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager .[quality,testing,torch]",
+    ],
+    parallelism=None,
+    pytest_num_workers=1,
    resource_class="large",
+    tests_to_run="tests/repo_utils",
 )


@ -317,18 +516,39 @@ repo_utils_job = CircleCIJob(
 # the bash output redirection.)
 py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
 py_command = f"$(python3 -c '{py_command}')"
-command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'
+command = f'echo "{py_command}" > pr_documentation_tests_temp.txt'
 doc_test_job = CircleCIJob(
    "pr_documentation_tests",
-    docker_image=[{"image":"huggingface/transformers-consistency"}],
    additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng time ffmpeg",
+        "pip install --upgrade --upgrade-strategy eager pip",
+        "pip install -U --upgrade-strategy eager -e .[dev]",
+        # Without --no-deps we can't pin dependency versions in the future
+        "pip install -U --upgrade-strategy eager --no-deps -e git+https://github.com/huggingface/accelerate@main#egg=accelerate",
+        "pip install --upgrade --upgrade-strategy eager 'pytest<8.0.0' pytest-sugar",
+        "pip install -U --upgrade-strategy eager natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels",
+        "pip install -U --upgrade-strategy eager g2p-en",
+        # TODO: remove this one after fixing the dependency issue(s) above
+        "pip install -U --upgrade-strategy eager torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu",
+        "find -name __pycache__ -delete",
+        "find . -name \*.pyc -delete",
        # Add an empty file to keep the test step running correctly even no file is selected to be tested.
-        "uv venv && pip install .",
        "touch dummy.py",
-        command,
-        "cat pr_documentation_tests_temp.txt",
-        "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt"
+        {
+            "name": "Get files to test",
+            "command": command,
+        },
+        {
+            "name": "Show information in `Get files to test`",
+            "command":
+                "cat pr_documentation_tests_temp.txt"
+        },
+        {
+            "name": "Get the last line in `pr_documentation_tests.txt`",
+            "command":
+                "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests.txt"
+        },
    ],
    tests_to_run="$(cat pr_documentation_tests.txt)",  # noqa
    pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None},
@ -336,37 +556,121 @@ doc_test_job = CircleCIJob(
    pytest_num_workers=1,
 )

-REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job] # fmt: skip
-EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
-PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
+REGULAR_TESTS = [
+    torch_and_tf_job,
+    torch_and_flax_job,
+    torch_job,
+    tf_job,
+    flax_job,
+    custom_tokenizers_job,
+    hub_job,
+    onnx_job,
+    exotic_models_job,
+]
+EXAMPLES_TESTS = [
+    examples_torch_job,
+    examples_tensorflow_job,
+    examples_flax_job,
+]
+PIPELINE_TESTS = [
+    pipelines_torch_job,
+    pipelines_tf_job,
+]
 REPO_UTIL_TESTS = [repo_utils_job]
 DOC_TESTS = [doc_test_job]
-ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job]  # fmt: skip
+

 def create_circleci_config(folder=None):
    if folder is None:
        folder = os.getcwd()
+    # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
    os.environ["test_preparation_dir"] = folder
-    jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )]
-    print("The following jobs will be run ", jobs)
+    jobs = []
+    all_test_file = os.path.join(folder, "test_list.txt")
+    if os.path.exists(all_test_file):
+        with open(all_test_file) as f:
+            all_test_list = f.read()
+    else:
+        all_test_list = []
+    if len(all_test_list) > 0:
+        jobs.extend(PIPELINE_TESTS)
+
+    test_file = os.path.join(folder, "filtered_test_list.txt")
+    if os.path.exists(test_file):
+        with open(test_file) as f:
+            test_list = f.read()
+    else:
+        test_list = []
+    if len(test_list) > 0:
+        jobs.extend(REGULAR_TESTS)
+
+        extended_tests_to_run = set(test_list.split())
+        # Extend the test files for cross test jobs
+        for job in jobs:
+            if job.job_name in ["tests_torch_and_tf", "tests_torch_and_flax"]:
+                for test_path in copy.copy(extended_tests_to_run):
+                    dir_path, fn = os.path.split(test_path)
+                    if fn.startswith("test_modeling_tf_"):
+                        fn = fn.replace("test_modeling_tf_", "test_modeling_")
+                    elif fn.startswith("test_modeling_flax_"):
+                        fn = fn.replace("test_modeling_flax_", "test_modeling_")
+                    else:
+                        if job.job_name == "test_torch_and_tf":
+                            fn = fn.replace("test_modeling_", "test_modeling_tf_")
+                        elif job.job_name == "test_torch_and_flax":
+                            fn = fn.replace("test_modeling_", "test_modeling_flax_")
+                    new_test_file = str(os.path.join(dir_path, fn))
+                    if os.path.isfile(new_test_file):
+                        if new_test_file not in extended_tests_to_run:
+                            extended_tests_to_run.add(new_test_file)
+        extended_tests_to_run = sorted(extended_tests_to_run)
+        for job in jobs:
+            if job.job_name in ["tests_torch_and_tf", "tests_torch_and_flax"]:
+                job.tests_to_run = extended_tests_to_run
+        fn = "filtered_test_list_cross_tests.txt"
+        f_path = os.path.join(folder, fn)
+        with open(f_path, "w") as fp:
+            fp.write(" ".join(extended_tests_to_run))
+
+    example_file = os.path.join(folder, "examples_test_list.txt")
+    if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
+        with open(example_file, "r", encoding="utf-8") as f:
+            example_tests = f.read()
+        for job in EXAMPLES_TESTS:
+            framework = job.name.replace("examples_", "").replace("torch", "pytorch")
+            if example_tests == "all":
+                job.tests_to_run = [f"examples/{framework}"]
+            else:
+                job.tests_to_run = [f for f in example_tests.split(" ") if f.startswith(f"examples/{framework}")]
+
+            if len(job.tests_to_run) > 0:
+                jobs.append(job)
+
+    doctest_file = os.path.join(folder, "doctest_list.txt")
+    if os.path.exists(doctest_file):
+        with open(doctest_file) as f:
+            doctest_list = f.read()
+    else:
+        doctest_list = []
+    if len(doctest_list) > 0:
+        jobs.extend(DOC_TESTS)
+
+    repo_util_file = os.path.join(folder, "test_repo_utils.txt")
+    if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
+        jobs.extend(REPO_UTIL_TESTS)

    if len(jobs) == 0:
        jobs = [EmptyJob()]
-    print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
-    config = {
-        "version": "2.1",
-        "parameters": {
-            # Only used to accept the parameters from the trigger
-            "nightly": {"type": "boolean", "default": False},
-            "tests_to_run": {"type": "string", "default": ''},
-            **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
-            **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
-        },
-        "jobs" : {j.job_name: j.to_dict() for j in jobs},
-        "workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
+    config = {"version": "2.1"}
+    config["parameters"] = {
+        # Only used to accept the parameters from the trigger
+        "nightly": {"type": "boolean", "default": False},
+        "tests_to_run": {"type": "string", "default": test_list},
    }
+    config["jobs"] = {j.job_name: j.to_dict() for j in jobs}
+    config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
    with open(os.path.join(folder, "generated_config.yml"), "w") as f:
-        f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))
+        f.write(yaml.dump(config, indent=2, width=1000000, sort_keys=False))


 if __name__ == "__main__":
--- a/.circleci/parse_test_outputs.py
+++ b/.circleci/parse_test_outputs.py
@ -1,70 +0,0 @@
-import re
-import argparse
-
-def parse_pytest_output(file_path):
-    skipped_tests = {}
-    skipped_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
-            if match:
-                skipped_count += 1
-                test_file, test_line, reason = match.groups()
-                skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
-    for k,v in sorted(skipped_tests.items(), key=lambda x:len(x[1])):
-        print(f"{len(v):4} skipped because: {k}")
-    print("Number of skipped tests:", skipped_count)
-
-def parse_pytest_failure_output(file_path):
-    failed_tests = {}
-    failed_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
-            if match:
-                failed_count += 1
-                _, error, reason = match.groups()
-                failed_tests[reason] = failed_tests.get(reason, []) + [error]
-    for k,v in sorted(failed_tests.items(), key=lambda x:len(x[1])):
-        print(f"{len(v):4} failed because `{v[0]}` -> {k}")
-    print("Number of failed tests:", failed_count)
-    if failed_count>0:
-        exit(1)
-
-def parse_pytest_errors_output(file_path):
-    print(file_path)
-    error_tests = {}
-    error_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
-            if match:
-                error_count += 1
-                _, test_error, reason = match.groups()
-                error_tests[reason] = error_tests.get(reason, []) + [test_error]
-    for k,v in sorted(error_tests.items(), key=lambda x:len(x[1])):
-        print(f"{len(v):4} errored out because of `{v[0]}` -> {k}")
-    print("Number of errors:", error_count)
-    if error_count>0:
-        exit(1)
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--file", help="file to parse")
-    parser.add_argument("--skip", action="store_true", help="show skipped reasons")
-    parser.add_argument("--fail", action="store_true", help="show failed tests")
-    parser.add_argument("--errors", action="store_true", help="show failed tests")
-    args = parser.parse_args()
-
-    if args.skip:
-        parse_pytest_output(args.file)
-
-    if args.fail:
-        parse_pytest_failure_output(args.file)
-
-    if args.errors:
-        parse_pytest_errors_output(args.file)
-
-
-if __name__ == "__main__":
-    main()
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -1,17 +1,6 @@
 name: "\U0001F41B Bug Report"
 description: Submit a bug report to help us improve transformers
-labels: [ "bug" ]
 body:
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for taking the time to fill out this bug report! 🤗
-
-        Before you submit your bug report:
-
-          - If it is your first time submitting, be sure to check our [bug report guidelines](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#did-you-find-a-bug)
-          - Try our [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat) -- it might be able to help you with your issue
-
  - type: textarea
    id: system-info
    attributes:
@ -28,50 +17,50 @@ body:
      description: |
        Your issue will be replied to more quickly if you can figure out the right person to tag with @
        If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
-
+        
        All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and
        a core maintainer will ping the right person.
-
+        
        Please tag fewer than 3 people.
-
+        
        Models:

-          - text models: @ArthurZucker
-          - vision models: @amyeroberts, @qubvel
-          - speech models: @ylacombe, @eustlb
+          - text models: @ArthurZucker and @younesbelkada
+          - vision models: @amyeroberts
+          - speech models: @sanchit-gandhi
          - graph models: @clefourrier
-
+        
        Library:
-
+        
          - flax: @sanchit-gandhi
-          - generate: @zucchini-nlp (visual-language models) or @gante (all others)
-          - pipelines: @Rocketknight1
+          - generate: @gante
+          - pipelines: @Narsil
          - tensorflow: @gante and @Rocketknight1
-          - tokenizers: @ArthurZucker and @itazap
-          - trainer: @muellerzr @SunMarc
-
+          - tokenizers: @ArthurZucker
+          - trainer: @muellerzr and @pacman100
+        
        Integrations:
-
-          - deepspeed: HF Trainer/Accelerate: @muellerzr
+        
+          - deepspeed: HF Trainer/Accelerate: @pacman100
          - ray/raytune: @richardliaw, @amogkam
          - Big Model Inference: @SunMarc
-          - quantization (bitsandbytes, autogpt): @SunMarc
-
+          - quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada
+        
        Documentation: @stevhliu
-
+        
        Model hub:

          - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator.
-
+        
        HF projects:
-
+        
          - accelerate: [different repo](https://github.com/huggingface/accelerate)
          - datasets: [different repo](https://github.com/huggingface/datasets)
          - diffusers: [different repo](https://github.com/huggingface/diffusers)
          - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
-
+        
        Maintained examples (not research project or legacy):
-
+        
          - Flax: @sanchit-gandhi
          - PyTorch: See Models above and tag the person corresponding to the modality of the example.
          - TensorFlow: @Rocketknight1
@ -112,11 +101,11 @@ body:

      placeholder: |
        Steps to reproduce the behavior:
-
+          
          1.
          2.
          3.
-
+          

  - type: textarea
    id: expected-behavior
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@ -1,6 +1,6 @@
 name: "\U0001F680 Feature request"
 description: Submit a proposal/request for a new transformers feature
-labels: [ "Feature request" ]
+labels: [ "feature" ]
 body:
  - type: textarea
    id: feature-request
@ -19,7 +19,7 @@ body:
      label: Motivation
      description: |
        Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
-
+        

  - type: textarea
    id: contribution
--- a/.github/ISSUE_TEMPLATE/i18n.md
+++ b/.github/ISSUE_TEMPLATE/i18n.md
@ -34,7 +34,7 @@ Some notes:

 ## Tutorial section
 - [ ] [pipeline_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md)
- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/autoclass_tutorial.md)
+- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/master/docs/source/autoclass_tutorial.md)
 - [ ]  [preprocessing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.md)
 - [ ]  [training.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.md)
 - [ ]  [accelerate.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.md)
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -39,29 +39,28 @@ members/contributors who may be interested in your PR.

 Models:

- text models: @ArthurZucker
- vision models: @amyeroberts, @qubvel
- speech models: @ylacombe, @eustlb
+- text models: @ArthurZucker and @younesbelkada
+- vision models: @amyeroberts
+- speech models: @sanchit-gandhi
 - graph models: @clefourrier

 Library:

 - flax: @sanchit-gandhi
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
- pipelines: @Rocketknight1
+- generate: @gante
+- pipelines: @Narsil
 - tensorflow: @gante and @Rocketknight1
 - tokenizers: @ArthurZucker
- trainer: @muellerzr and @SunMarc
- chat templates: @Rocketknight1
+- trainer: @muellerzr and @pacman100

 Integrations:

- deepspeed: HF Trainer/Accelerate: @muellerzr
+- deepspeed: HF Trainer/Accelerate: @pacman100
 - ray/raytune: @richardliaw, @amogkam
 - Big Model Inference: @SunMarc
- quantization (bitsandbytes, autogpt): @SunMarc
+- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada

-Documentation: @stevhliu
+Documentation: @stevhliu and @MKhalusova

 HF projects:

--- a/.github/actions/post-slack/action.yml
+++ b/.github/actions/post-slack/action.yml
@ -0,0 +1,79 @@
+name: Send message to slack
+
+description: 'Send results to slack'
+author: 'Hugging Face'
+inputs:
+  slack_channel:
+    required: true
+    type: string
+  title:
+    required: true
+    type: string
+  status:
+    required: true
+    type: string
+  slack_token:
+    required: true
+    type: string
+
+runs:
+  using: "composite"
+  steps:
+    - name: Create content to post
+      id: create-message
+      run: |
+        if [ "${{ inputs.status }}" == "success" ]; then
+          echo STATUS_MESSAGE='🟢 Tests are passing!' >> $GITHUB_ENV
+        else
+          echo STATUS_MESSAGE='🔴 Tests failed! Please check the GitHub action link below' >> $GITHUB_ENV
+        fi
+      shell: bash
+
+    - name: Post Canceled results Slack channel
+      id: post-slack
+      uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
+      with:
+        # Slack channel id, channel name, or user id to post message.
+        # See also: https://api.slack.com/methods/chat.postMessage#channels
+        channel-id: ${{ inputs.slack_channel }}
+        # For posting a rich message using Block Kit
+        payload: |
+          {
+            "text": "${{ inputs.title }}",
+            "blocks": [
+              {
+                "type": "header",
+                "text": {
+                    "type": "plain_text",
+                    "text": "${{ inputs.title }}"
+                }
+              },
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "${{ env.STATUS_MESSAGE }}"
+                }
+              },
+              {
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": "*Click the button for more details about the commit*"},
+                "accessory": {
+                    "type": "button",
+                    "text": {"type": "plain_text", "text": "Check Commit results"},
+                    "url": "${{ github.event.pull_request.html_url || github.event.head_commit.url }}"
+                }
+              },
+              {
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": "*Click here for more details about the action ran*"},
+                "accessory": {
+                    "type": "button",
+                    "text": {"type": "plain_text", "text": "Check Action results"},
+                    "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+                }
+              }
+            ]
+          }
+      env:
+        SLACK_BOT_TOKEN: ${{ inputs.slack_token }}
--- a/.github/workflows/add-model-like.yml
+++ b/.github/workflows/add-model-like.yml
@ -23,7 +23,7 @@ jobs:
          sudo apt -y update && sudo apt install -y libsndfile1-dev

      - name: Load cached virtual environment
-        uses: actions/cache@v4
+        uses: actions/cache@v2
        id: cache
        with:
          path: ~/venv/
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -1,42 +0,0 @@
-name: Self-hosted runner (benchmark)
-
-on:
-  schedule:
-    - cron: "17 2 * * *"
-  workflow_call:
-
-env:
-  HF_HOME: /mnt/cache
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-
-
-jobs:
-  benchmark:
-    name: Benchmark
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: |
-          git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Benchmark (daily)
-        if: github.event_name == 'schedule'
-        working-directory: /transformers
-        run: |
-          python3 -m pip install optimum-benchmark>=0.3.0
-          HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
-
-      - name: Benchmark (merged to main event)
-        if: github.event_name == 'push' && github.ref_name == 'main'
-        working-directory: /transformers
-        run: |
-          python3 -m pip install optimum-benchmark>=0.3.0
-          HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results_merge_event --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
--- a/.github/workflows/build-ci-docker-images.yml
+++ b/.github/workflows/build-ci-docker-images.yml
@ -1,77 +0,0 @@
-name: Build pr ci-docker
-
-on:
-  push:
-    branches:
-      - push-ci-image # for now let's only build on this branch
-  repository_dispatch:
-  workflow_call:
-    inputs:
-      image_postfix:
-        required: true
-        type: string
-  schedule:
-    - cron: "6 0 * * *"
-
-
-concurrency:
-  group: ${{ github.workflow }}
-  cancel-in-progress: true
-
-jobs:
-  build:
-    runs-on: ubuntu-22.04
-
-    if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
-
-    strategy:
-      matrix:
-        file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch",  "examples-tf"]
-    continue-on-error: true
-
-    steps:
-      -
-        name: Set tag
-        run: |
-              if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
-                  echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV" 
-                  echo "setting it to DEV!"
-              else
-                  echo "TAG=huggingface/transformers-${{ matrix.file }}" >> "$GITHUB_ENV"
-                  
-              fi
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Check out code
-        uses: actions/checkout@v4
-      -
-        name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
-        name: Build ${{ matrix.file }}.dockerfile
-        uses: docker/build-push-action@v5
-        with:
-          context: ./docker
-          build-args: |
-            REF=${{ github.sha }}
-          file: "./docker/${{ matrix.file }}.dockerfile"
-          push: ${{ contains(github.event.head_commit.message, 'ci-image]') ||  github.event_name == 'schedule' }}
-          tags: ${{ env.TAG }}
-
-  notify:
-    runs-on: ubuntu-22.04
-    if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
-    steps:
-      - name: Post to Slack
-        if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }}
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: "#transformers-ci-circleci-images"
-          title: 🤗 New docker images for CircleCI are pushed.
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@ -57,19 +57,20 @@ jobs:
          push: true
          tags: huggingface/transformers-all-latest-gpu-push-ci

-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
  latest-torch-deepspeed-docker:
    name: "Latest PyTorch + DeepSpeed"
    runs-on: [intel-cpu, 8-cpu, ci]
    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
      -
        name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@ -92,20 +93,21 @@ jobs:
          push: true
          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}

-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
  # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
  latest-torch-deepspeed-docker-for-push-ci-daily-build:
    name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
    runs-on: [intel-cpu, 8-cpu, ci]
    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
      -
        name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@ -132,15 +134,6 @@ jobs:
          push: true
          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci

-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
  doc-builder:
    name: "Doc builder"
    # Push CI doesn't need this image
@ -167,21 +160,22 @@ jobs:
          push: true
          tags: huggingface/transformers-doc-builder

-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-doc-builder docker build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
  latest-pytorch:
    name: "Latest PyTorch [dev]"
    # Push CI doesn't need this image
    if: inputs.image_postfix != '-push-ci'
    runs-on: [intel-cpu, 8-cpu, ci]
    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
      -
        name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@ -204,15 +198,6 @@ jobs:
          push: true
          tags: huggingface/transformers-pytorch-gpu

-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
  latest-pytorch-amd:
    name: "Latest PyTorch (AMD) [dev]"
    runs-on: [intel-cpu, 8-cpu, ci]
@ -252,15 +237,6 @@ jobs:
          push: true
          tags: huggingface/transformers-pytorch-amd-gpu-push-ci

-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
  latest-tensorflow:
    name: "Latest TensorFlow [dev]"
    # Push CI doesn't need this image
@ -289,15 +265,6 @@ jobs:
          push: true
          tags: huggingface/transformers-tensorflow-gpu

-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
  latest-pytorch-deepspeed-amd:
    name: "PyTorch + DeepSpeed (AMD) [dev]"
    runs-on: [intel-cpu, 8-cpu, ci]
@ -337,15 +304,6 @@ jobs:
          push: true
          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci

-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-
  latest-quantization-torch-docker:
    name: "Latest Pytorch + Quantization [dev]"
     # Push CI doesn't need this image
@ -372,13 +330,4 @@ jobs:
          build-args: |
            REF=main
          push: true
-          tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }}
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-quantization-latest-gpu build 
-          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+          tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }}
--- a/.github/workflows/build-nightly-ci-docker-images.yml
+++ b/.github/workflows/build-nightly-ci-docker-images.yml
@ -13,8 +13,18 @@ concurrency:
 jobs:
  latest-with-torch-nightly-docker:
    name: "Nightly PyTorch + Stable TensorFlow"
-    runs-on: [intel-cpu, 8-cpu, ci]
+    runs-on: ubuntu-22.04
    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
      -
        name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
@ -40,8 +50,18 @@ jobs:

  nightly-torch-deepspeed-docker:
    name: "Nightly PyTorch + DeepSpeed"
-    runs-on: [intel-cpu, 8-cpu, ci]
+    runs-on: ubuntu-22.04
    steps:
+      - name: Cleanup disk
+        run: |
+          sudo ls -l /usr/local/lib/
+          sudo ls -l /usr/share/
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo du -sh /usr/local/lib/
+          sudo du -sh /usr/share/
      -
        name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
--- a/.github/workflows/build-past-ci-docker-images.yml
+++ b/.github/workflows/build-past-ci-docker-images.yml
@ -16,7 +16,7 @@ jobs:
      fail-fast: false
      matrix:
        version: ["1.13", "1.12", "1.11"]
-    runs-on: [intel-cpu, 8-cpu, ci]
+    runs-on: ubuntu-22.04
    steps:
      -
        name: Set up Docker Buildx
@ -60,7 +60,7 @@ jobs:
      fail-fast: false
      matrix:
        version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"]
-    runs-on: [intel-cpu, 8-cpu, ci]
+    runs-on: ubuntu-22.04
    steps:
      -
        name: Set up Docker Buildx
--- a/.github/workflows/check_tiny_models.yml
+++ b/.github/workflows/check_tiny_models.yml
@ -23,7 +23,7 @@ jobs:

      - uses: actions/checkout@v4
      - name: Set up Python 3.8
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v4
        with:
          # Semantic version range syntax or exact version of a Python version
          python-version: '3.8'
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@ -12,12 +12,6 @@ on:
      slice_id:
        required: true
        type: number
-      runner:
-        required: true
-        type: string
-      docker:
-        required: true
-        type: string

 env:
  HF_HOME: /mnt/cache
@ -37,13 +31,12 @@ jobs:
  run_models_gpu:
    name: " "
    strategy:
-      max-parallel: 8
      fail-fast: false
      matrix:
        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
-    runs-on: ['${{ inputs.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+    runs-on: ['${{ inputs.machine_type }}', nvidia-gpu, t4, daily-ci]
    container:
-      image: ${{ inputs.docker }}
+      image: huggingface/transformers-all-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Echo input and matrix info
@ -72,18 +65,6 @@ jobs:
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') }}
-        working-directory: /transformers
-        run: |
-          python3 -m pip install -U datasets
-
-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
-        working-directory: /transformers
-        run: |
-          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
-
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
@ -99,7 +80,7 @@ jobs:

      - name: Run all tests on GPU
        working-directory: /transformers
-        run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+        run: python3 -m pytest -rs -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}

      - name: Failure short reports
        if: ${{ failure() }}
--- a/.github/workflows/model_jobs_amd.yml
+++ b/.github/workflows/model_jobs_amd.yml
@ -1,129 +0,0 @@
-name: model jobs
-
-on:
-  workflow_call:
-    inputs:
-      folder_slices:
-        required: true
-        type: string
-      machine_type:
-        required: true
-        type: string
-      slice_id:
-        required: true
-        type: number
-      runner:
-        required: true
-        type: string
-      docker:
-        required: true
-        type: string
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes
-  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
-  # This token is created under the bot `hf-transformers-bot`.
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  RUN_PT_TF_CROSS_TESTS: 1
-  CUDA_VISIBLE_DEVICES: 0,1
-
-jobs:
-  run_models_gpu:
-    name: " "
-    strategy:
-      max-parallel: 1  # For now, not to parallelize. Can change later if it works well.
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
-    runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
-    container:
-      image: ${{ inputs.docker }}
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo input and matrix info
-        shell: bash
-        run: |
-          echo "${{ inputs.folder_slices }}"
-          echo "${{ matrix.folders }}"
-          echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') }}
-        working-directory: /transformers
-        run: |
-          python3 -m pip install -U datasets
-
-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
-        working-directory: /transformers
-        run: |
-          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
-
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}  -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Run test
-        shell: bash
-        run: |
-          mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/.github/workflows/push-important-models.yml
+++ b/.github/workflows/push-important-models.yml
@ -5,6 +5,7 @@ on:
    branches: [ main ]

 env:
+  IS_GITHUB_CI: "1"
  OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  HF_HOME: /mnt/cache 
@ -85,7 +86,7 @@ jobs:
      - name: Run FA2 tests
        id: run_fa2_tests
        run:
-          pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
+          pytest -rs -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
      
      - name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
        if: ${{ always() }}
@ -96,7 +97,7 @@ jobs:
      
      - name: Post to Slack
        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        uses: ./.github/actions/post-slack
        with:
          slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
          title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
@ -107,7 +108,7 @@ jobs:
        id: run_integration_tests
        if: always()
        run:
-          pytest -rsfE -k "IntegrationTest"  --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
+          pytest -rs -k "IntegrationTest"  --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
      
      - name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
        if: ${{ always() }}
@ -118,7 +119,7 @@ jobs:

      - name: Post to Slack
        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main 
+        uses: ./.github/actions/post-slack
        with:
          slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
          title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
@ -133,10 +134,3 @@ jobs:
          slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
          waitForSSH: true
-
-  benchmark:
-    name: Benchmark workflow
-    needs: get_modified_models
-    if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
-    uses: ./.github/workflows/benchmark.yml
-    secrets: inherit
--- a/.github/workflows/release-conda.yml
+++ b/.github/workflows/release-conda.yml
@ -19,7 +19,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

      - name: Install miniconda
        uses: conda-incubator/setup-miniconda@v2
--- a/.github/workflows/remind_slow_ci.yml
+++ b/.github/workflows/remind_slow_ci.yml
@ -1,19 +0,0 @@
-name: Build PR Documentation
-
-on:
-  pull_request_target:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  remind:
-    name: remind
-    runs-on: ubuntu-22.04
-    env:
-      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-    steps:
-    - name: Install requirements
-      run: |
-        echo "Bonjour"
--- a/.github/workflows/self-new-model-pr-caller.yml
+++ b/.github/workflows/self-new-model-pr-caller.yml
@ -4,11 +4,6 @@ on:
  pull_request:
    paths:
      - "src/transformers/models/*/modeling_*.py"
-      - "tests/**/test_*.py"
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true

 env:
  HF_HOME: /mnt/cache
@ -25,48 +20,33 @@ env:
  CUDA_VISIBLE_DEVICES: 0,1

 jobs:
-  find_models_to_run:
+  check_for_new_model:
      runs-on: ubuntu-22.04
-      name: Find models to run slow tests
-      # Triggered only if the required label `run-slow` is added
-      if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }}
+      name: Check if a PR is a new model PR
      outputs:
-        models: ${{ steps.models_to_run.outputs.models }}
+        new_model: ${{ steps.check_new_model.outputs.new_model }}
      steps:
        - uses: actions/checkout@v4
          with:
            fetch-depth: "0"
-            ref: ${{ github.event.pull_request.head.sha }}

-        - name: Get commit message
+        - name: Check if there is a new model
+          id: check_new_model
          run: |
-            echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV
-
-        - name: Get models to run slow tests
-          run: |
-            echo "${{ env.commit_message }}"
            python -m pip install GitPython
-            python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" | tee output.txt
-            echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
-
-        - name: Models to run slow tests
-          id: models_to_run
-          run: |
-            echo "${{ env.models }}"
-            echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
+            echo "new_model=$(python utils/check_if_new_model_added.py | tail -n 1)" >> $GITHUB_OUTPUT

  run_models_gpu:
-      name: Run all tests for the model
-      # Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
-      # (either a new model PR or via a commit message)
-      if: ${{ needs.find_models_to_run.outputs.models != '[]' }}
-      needs: find_models_to_run
+      name: Run all tests for the new model
+      # Triggered if it is a new model PR and the required label is added
+      if: ${{ needs.check_for_new_model.outputs.new_model != '' && contains(github.event.pull_request.labels.*.name, 'single-model-run-slow') }}
+      needs: check_for_new_model
      strategy:
        fail-fast: false
        matrix:
-          folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
+          folders: ["${{ needs.check_for_new_model.outputs.new_model }}"]
          machine_type: [single-gpu, multi-gpu]
-      runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci]
+      runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
      container:
        image: huggingface/transformers-all-latest-gpu
        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -89,7 +69,7 @@ jobs:

      - name: Update clone
        working-directory: /transformers
-        run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge
+        run: git fetch && git checkout ${{ github.event.pull_request.head.sha }}

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
@ -110,10 +90,7 @@ jobs:

      - name: Run all tests on GPU
        working-directory: /transformers
-        run: |
-          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
-          echo $CUDA_VISIBLE_DEVICES
-          python3 -m pytest -v -rsfE --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+        run: python3 -m pytest -v -rs --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}

      - name: Failure short reports
        if: ${{ failure() }}
--- a/.github/workflows/self-nightly-caller.yml
+++ b/.github/workflows/self-nightly-caller.yml
@ -1,43 +0,0 @@
-name: Self-hosted runner (nightly-ci)
-
-
-on:
-  repository_dispatch:
-  schedule:
-    - cron: "17 2 * * *"
-  push:
-    branches:
-      - run_nightly_ci*
-
-jobs:
-  build_nightly_ci_images:
-    name: Build Nightly CI Docker Images
-    if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
-    uses: ./.github/workflows/build-nightly-ci-docker-images.yml
-    secrets: inherit
-
-  model-ci:
-    name: Model CI
-    needs: [build_nightly_ci_images]
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-past-future"
-      runner: ci
-      docker: huggingface/transformers-all-latest-torch-nightly-gpu
-      ci_event: Nightly CI
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    needs: [build_nightly_ci_images]
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-past-future"
-      runner: ci
-      # test deepspeed nightly build with the latest release torch
-      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
-      ci_event: Nightly CI
-      working-directory-prefix: /workspace
-    secrets: inherit
--- a/.github/workflows/self-nightly-past-ci-caller.yml
+++ b/.github/workflows/self-nightly-past-ci-caller.yml
@ -2,30 +2,32 @@ name: Self-hosted runner (nightly-past-ci-caller)

 on:
  schedule:
-    - cron: "17 2,14 * * *"
+    # 2:17 am on each Sunday and Thursday
+
+    - cron: "17 2 * * 0,4"
  push:
    branches:
+      - run_nightly_ci*
      - run_past_ci*

 jobs:
-  get_number:
-    name: Get number
-    runs-on: ubuntu-22.04
-    outputs:
-      run_number: ${{ steps.get_number.outputs.run_number }}
-    steps:
-      - name: Get number
-        id: get_number
-        run: |
-          echo "${{ github.run_number }}"
-          echo "$(python3 -c 'print(int(${{ github.run_number }}) % 10)')"
-          echo "run_number=$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" >> $GITHUB_OUTPUT
+  build_nightly_ci_images:
+    name: Build Nightly CI Docker Images
+    if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
+    uses: ./.github/workflows/build-nightly-ci-docker-images.yml
+    secrets: inherit
+
+  run_nightly_ci:
+    name: Nightly CI
+    needs: [build_nightly_ci_images]
+    uses: ./.github/workflows/self-nightly-scheduled.yml
+    secrets: inherit

  run_past_ci_pytorch_1-13:
    name: PyTorch 1.13
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 0 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
+    needs: [run_nightly_ci]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: pytorch
      version: "1.13"
@ -34,9 +36,9 @@ jobs:

  run_past_ci_pytorch_1-12:
    name: PyTorch 1.12
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 1 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
+    needs: [run_past_ci_pytorch_1-13]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: pytorch
      version: "1.12"
@ -45,9 +47,9 @@ jobs:

  run_past_ci_pytorch_1-11:
    name: PyTorch 1.11
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 2 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
+    needs: [run_past_ci_pytorch_1-12]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: pytorch
      version: "1.11"
@ -56,9 +58,9 @@ jobs:

  run_past_ci_tensorflow_2-11:
    name: TensorFlow 2.11
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 3 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_pytorch_1-11]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.11"
@ -67,9 +69,9 @@ jobs:

  run_past_ci_tensorflow_2-10:
    name: TensorFlow 2.10
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 4 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-11]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.10"
@ -78,9 +80,9 @@ jobs:

  run_past_ci_tensorflow_2-9:
    name: TensorFlow 2.9
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 5 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-10]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.9"
@ -89,9 +91,9 @@ jobs:

  run_past_ci_tensorflow_2-8:
    name: TensorFlow 2.8
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 6 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-9]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.8"
@ -100,9 +102,9 @@ jobs:

  run_past_ci_tensorflow_2-7:
    name: TensorFlow 2.7
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 7 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-8]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.7"
@ -111,9 +113,9 @@ jobs:

  run_past_ci_tensorflow_2-6:
    name: TensorFlow 2.6
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 8 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-7]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.6"
@ -122,9 +124,9 @@ jobs:

  run_past_ci_tensorflow_2-5:
    name: TensorFlow 2.5
-    needs: get_number
-    if: needs.get_number.outputs.run_number == 9 &&  (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    uses: ./.github/workflows/self-past-caller.yml
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-6]
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.5"
--- a/.github/workflows/self-nightly-scheduled.yml
+++ b/.github/workflows/self-nightly-scheduled.yml
@ -0,0 +1,290 @@
+name: Self-hosted runner (nightly-ci)
+
+# Note that each job's dependencies go into a corresponding docker file.
+#
+# For example for `run_torch_cuda_extensions_gpu` the docker image is
+# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
+# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
+
+on:
+  repository_dispatch:
+  workflow_call:
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  RUN_SLOW: yes
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
+  CUDA_VISIBLE_DEVICES: 0,1
+
+jobs:
+  setup:
+    name: Setup
+    strategy:
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
+    container:
+      image: huggingface/transformers-all-latest-torch-nightly-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: |
+          git fetch && git checkout ${{ github.sha }}
+
+      - name: Cleanup
+        working-directory: /transformers
+        run: |
+          rm -rf tests/__pycache__
+          rm -rf tests/models/__pycache__
+          rm -rf reports
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - id: set-matrix
+        name: Identify models to test
+        working-directory: /transformers/tests
+        run: |
+          echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+  run_tests_single_gpu:
+    name: Model tests
+    strategy:
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
+        machine_type: [single-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
+    container:
+      image: huggingface/transformers-all-latest-torch-nightly-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
+
+  run_tests_multi_gpu:
+    name: Model tests
+    strategy:
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
+        machine_type: [multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
+    container:
+      image: huggingface/transformers-all-latest-torch-nightly-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
+
+  run_torch_cuda_extensions_gpu:
+    name: Torch CUDA extension tests
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
+    needs: setup
+    container:
+      image: huggingface/transformers-pytorch-deepspeed-nightly-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Update clone
+        working-directory: /workspace/transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /workspace/transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: Remove cached torch extensions
+        run: rm -rf /github/home/.cache/torch_extensions/
+
+      # To avoid unknown test failures
+      - name: Pre build DeepSpeed *again*
+        working-directory: /workspace
+        run: |
+          python3 -m pip uninstall -y deepspeed
+          rm -rf DeepSpeed
+          git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
+          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /workspace/transformers
+        run: |
+          python utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /workspace/transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /workspace/transformers
+        run: |
+          python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly
+          path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+
+  send_results:
+    name: Send results to webhook
+    runs-on: ubuntu-22.04
+    if: always()
+    needs: [
+      setup,
+      run_tests_single_gpu,
+      run_tests_multi_gpu,
+      run_torch_cuda_extensions_gpu
+    ]
+    steps:
+      - name: Preliminary job status
+        shell: bash
+        # For the meaning of these environment variables, see the job `Setup`
+        run: |
+          echo "Setup status: ${{ needs.setup.result }}"
+
+      - uses: actions/checkout@v4
+      - uses: actions/download-artifact@v4
+      - name: Send message to Slack
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
+          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
+          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
+          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }}
+          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+          CI_EVENT: Nightly CI
+          SETUP_STATUS: ${{ needs.setup.result }}
+        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
+        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
+        run: |
+          pip install slack_sdk
+          pip show slack_sdk
+          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
+
+
+      # delete-artifact
+      - uses: geekyeggo/delete-artifact@v2
+        with:
+          name: |
+              single-*
+              multi-*
--- a/.github/workflows/self-past-caller.yml
+++ b/.github/workflows/self-past-caller.yml
@ -1,40 +0,0 @@
-name: Self-hosted runner (past-ci)
-
-
-on:
-  workflow_call:
-    inputs:
-      framework:
-        required: true
-        type: string
-      version:
-        required: true
-        type: string
-      # Use this to control the commit to test against
-      sha:
-        default: 'main'
-        required: false
-        type: string
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-past-future"
-      runner: past-ci
-      docker: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
-      ci_event: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-past-future"
-      runner: past-ci
-      docker: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
-      ci_event: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
-    secrets: inherit
--- a/.github/workflows/self-past.yml
+++ b/.github/workflows/self-past.yml
@ -0,0 +1,357 @@
+name: Self-hosted runner (past-ci)
+
+# Note that each job's dependencies go into a corresponding docker file.
+#
+# For example for `run_torch_cuda_extensions_gpu` the docker image is
+# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
+# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
+
+on:
+  workflow_call:
+    inputs:
+      framework:
+        required: true
+        type: string
+      version:
+        required: true
+        type: string
+      # Use this to control the commit to test against
+      sha:
+        default: 'main'
+        required: false
+        type: string
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  RUN_SLOW: yes
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
+  CUDA_VISIBLE_DEVICES: 0,1
+
+jobs:
+  setup:
+    name: Setup
+    strategy:
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
+    container:
+      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ inputs.sha }}
+
+      - name: Cleanup
+        working-directory: /transformers
+        run: |
+          rm -rf tests/__pycache__
+          rm -rf tests/models/__pycache__
+          rm -rf reports
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - id: set-matrix
+        working-directory: /transformers
+        name: Identify models to test
+        run: |
+          cd tests
+          echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT
+
+  run_tests_single_gpu:
+    name: Model tests
+    strategy:
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
+        machine_type: [single-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
+    container:
+      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ inputs.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: Update some packages
+        working-directory: /transformers
+        run: python3 -m pip install -U datasets
+
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Install
+        if: inputs.framework == 'pytorch'
+        working-directory: /transformers
+        run: |
+          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+
+      - name: Save job name
+        if: ${{ always() }}
+        shell: bash
+        run: |
+          matrix_folders=${matrix_folders/'models_'/'models/'}
+          job_name="Model tests ($matrix_folders, ${{ matrix.machine_type }})"
+          echo "$job_name"
+          echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
+
+  run_tests_multi_gpu:
+    name: Model tests
+    strategy:
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
+        machine_type: [multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
+    container:
+      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ inputs.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: Update some packages
+        working-directory: /transformers
+        run: python3 -m pip install -U datasets
+
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Install
+        if: inputs.framework == 'pytorch'
+        working-directory: /transformers
+        run: |
+          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+
+      - name: Save job name
+        if: ${{ always() }}
+        shell: bash
+        run: |
+          matrix_folders=${matrix_folders/'models_'/'models/'}
+          job_name="Model tests ($matrix_folders, ${{ matrix.machine_type }})"
+          echo "$job_name"
+          echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
+
+  run_torch_cuda_extensions_gpu:
+    name: Torch CUDA extension tests
+    if: inputs.framework == 'pytorch'
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
+    needs: setup
+    container:
+      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: Update some packages
+        working-directory: /transformers
+        run: python3 -m pip install -U datasets
+
+      - name: Install
+        working-directory: /transformers
+        run: |
+          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
+
+      - name: Remove cached torch extensions
+        run: rm -rf /github/home/.cache/torch_extensions/
+
+      # To avoid unknown test failures
+      - name: Pre build DeepSpeed *again*
+        working-directory: /
+        run: |
+          python3 -m pip uninstall -y deepspeed
+          rm -rf DeepSpeed
+          git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
+          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
+          path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+
+  send_results:
+    name: Send results to webhook
+    runs-on: ubuntu-22.04
+    if: always()
+    needs: [
+      setup,
+      run_tests_single_gpu,
+      run_tests_multi_gpu,
+      run_torch_cuda_extensions_gpu
+    ]
+    steps:
+      - name: Preliminary job status
+        shell: bash
+        # For the meaning of these environment variables, see the job `Setup`
+        run: |
+          echo "Setup status: ${{ needs.setup.result }}"
+
+      - uses: actions/checkout@v4
+      - uses: actions/download-artifact@v4
+
+      # Create a directory to store test failure tables in the next step
+      - name: Create directory
+        run: mkdir test_failure_tables
+
+      - name: Send message to Slack
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
+          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
+          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
+          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }}
+          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+          CI_EVENT: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
+          SETUP_STATUS: ${{ needs.setup.result }}
+        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
+        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
+        run: |
+          pip install slack_sdk
+          pip show slack_sdk
+          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
+
+      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
+      - name: Failure table artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
+          path: test_failure_tables
+
+      # delete-artifact
+      - uses: geekyeggo/delete-artifact@v2
+        with:
+          name: |
+              single-*
+              multi-*
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@ -1,25 +0,0 @@
-name: Self-hosted runner (AMD mi300 CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (push-caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi300
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi300
-    secrets: inherit
--- a/.github/workflows/self-push-amd.yml
+++ b/.github/workflows/self-push-amd.yml
@ -36,7 +36,7 @@ jobs:
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -57,31 +57,30 @@ jobs:
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      test_map: ${{ steps.set-matrix.outputs.test_map }}
-    env:
-      # `CI_BRANCH_PUSH`: The branch name from the push event
-      # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
-      # `CI_SHA_PUSH`: The commit SHA from the push event
-      # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
      - name: Prepare custom environment variables
        shell: bash
+        # `CI_BRANCH_PUSH`: The branch name from the push event
+        # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
        # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
+        # `CI_SHA_PUSH`: The commit SHA from the push event
+        # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
        # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -156,16 +155,10 @@ jobs:
      matrix:
        folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -173,7 +166,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -233,7 +230,7 @@ jobs:
      - name: Run all non-slow selected tests on GPU
        working-directory: /transformers
        run: |
-          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test"
+          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}

      - name: Failure short reports
        if: ${{ failure() }}
@ -259,12 +256,6 @@ jobs:
 #        run_tests_torch_cuda_extensions_single_gpu,
 #        run_tests_torch_cuda_extensions_multi_gpu
    ]
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      - name: Preliminary job status
        shell: bash
@ -280,7 +271,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -329,7 +324,6 @@ jobs:
        # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        run: |
-          pip install huggingface_hub
          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@ -40,24 +40,23 @@ jobs:
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      test_map: ${{ steps.set-matrix.outputs.test_map }}
-    env:
-      # `CI_BRANCH_PUSH`: The branch name from the push event
-      # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
-      # `CI_SHA_PUSH`: The commit SHA from the push event
-      # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
      - name: Prepare custom environment variables
        shell: bash
+        # `CI_BRANCH_PUSH`: The branch name from the push event
+        # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
        # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
+        # `CI_SHA_PUSH`: The commit SHA from the push event
+        # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
        # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -136,12 +135,6 @@ jobs:
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -149,7 +142,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -231,12 +228,6 @@ jobs:
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -244,7 +235,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -326,12 +321,6 @@ jobs:
    container:
      image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -339,7 +328,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -418,12 +411,6 @@ jobs:
    container:
      image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -431,7 +418,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -509,12 +500,6 @@ jobs:
        run_tests_torch_cuda_extensions_single_gpu,
        run_tests_torch_cuda_extensions_multi_gpu
    ]
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      - name: Preliminary job status
        shell: bash
@ -528,7 +513,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
+          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
+          CI_SHA_PUSH=${{ github.event.head_commit.id }}
+          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -574,7 +563,6 @@ jobs:
        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        run: |
-          pip install huggingface_hub
-          pip install slack_sdk 
+          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
--- a/.github/workflows/self-scheduled-amd-mi210-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml
@ -10,46 +10,10 @@ on:
      - run_amd_scheduled_ci_caller*

 jobs:
-  model-ci:
-    name: Model CI
+  run_amd_ci:
+    name: AMD mi210
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller')))
    uses: ./.github/workflows/self-scheduled-amd.yml
    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
+      gpu_flavor: mi210
    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi250-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml
@ -10,46 +10,10 @@ on:
      - run_amd_scheduled_ci_caller*

 jobs:
-  model-ci:
-    name: Model CI
+  run_amd_ci:
+    name: AMD mi250
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller')))
    uses: ./.github/workflows/self-scheduled-amd.yml
    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
+      gpu_flavor: mi250
    secrets: inherit
--- a/.github/workflows/self-scheduled-amd.yml
+++ b/.github/workflows/self-scheduled-amd.yml
@ -3,23 +3,10 @@ name: Self-hosted runner (scheduled-amd)
 # Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the
 # CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes
 # us towards the limit of allowed jobs on GitHub Actions.
-
 on:
  workflow_call:
    inputs:
-      job:
-        required: true
-        type: string
-      slack_report_channel:
-        required: true
-        type: string
-      runner:
-        required: true
-        type: string
-      docker:
-        required: true
-        type: string
-      ci_event:
+      gpu_flavor:
        required: true
        type: string

@ -31,7 +18,7 @@ env:
  RUN_SLOW: yes
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  NUM_SLICES: 2
+

 # Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running.
 # This is done so that we avoid parallelizing the scheduled tests, to leave available
@ -47,7 +34,7 @@ jobs:
          fetch-depth: 2

      - name: Check Runner Status
-        run: python utils/check_self_hosted_runner.py --target_runners hf-amd-mi210-ci-1gpu-1,hf-amd-mi250-ci-1gpu-1,hf-amd-mi300-ci-1gpu-1 --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+        run: python utils/check_self_hosted_runner.py --target_runners hf-amd-mi210-ci-1gpu-1,hf-amd-mi250-ci-1gpu-1 --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}

  check_runners:
    name: Check Runners
@ -55,7 +42,7 @@ jobs:
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -63,29 +50,25 @@ jobs:
      - name: ROCM-SMI
        run: |
          rocm-smi
-
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
-
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"

  setup:
-    if: contains(fromJSON('["run_models_gpu"]'), inputs.job)
    name: Setup
    needs: check_runners
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
-      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
-      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Update clone
        working-directory: /transformers
@ -107,8 +90,7 @@ jobs:
        name: Identify models to test
        working-directory: /transformers/tests
        run: |
-          echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
-          echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
+          echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT

      - name: ROCM-SMI
        run: |
@ -117,7 +99,6 @@ jobs:
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
-
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
@ -127,38 +108,31 @@ jobs:
        run: |
          python3 utils/print_env.py

-  run_models_gpu:
-    if: ${{ inputs.job == 'run_models_gpu' }}
+  run_models_gpu_single_gpu:
    name: Single GPU tests
-    needs: setup
    strategy:
      max-parallel: 1  # For now, not to parallelize. Can change later if it works well.
      fail-fast: false
      matrix:
-        machine_type: [single-gpu, multi-gpu]
-        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
-    uses: ./.github/workflows/model_jobs_amd.yml
-    with:
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      machine_type: ${{ matrix.machine_type }}
-      slice_id: ${{ matrix.slice_id }}
-      runner: ${{ inputs.runner }}
-      docker: ${{ inputs.docker }}
-    secrets: inherit
-
-  run_pipelines_torch_gpu:
-    if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
-    name: PyTorch pipelines
-    needs: check_runners
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
+        machine_type: [single-gpu]
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
-      image: ${{ inputs.docker }}
+      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
    steps:
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
@ -170,11 +144,9 @@ jobs:
      - name: ROCM-SMI
        run: |
          rocm-smi
-
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
-
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
@ -188,35 +160,101 @@ jobs:
        working-directory: /transformers
        run: pip freeze

-      - name: Run all pipeline tests on GPU
+      - name: Run all tests on GPU
        working-directory: /transformers
-        run: |
-          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}

      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt

-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
-          path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
+          name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+
+  run_models_gpu_multi_gpu:
+    name: Multi GPU tests
+    strategy:
+      max-parallel: 1
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
+        machine_type: [multi-gpu]
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    container:
+      image: huggingface/transformers-pytorch-amd-gpu
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

  run_examples_gpu:
-    if: ${{ inputs.job == 'run_examples_gpu' }}
-    name: Examples directory
-    needs: check_runners
+    name: Examples tests
    strategy:
      fail-fast: false
      matrix:
        machine_type: [single-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
-      image: ${{ inputs.docker }}
+      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
    steps:
      - name: Update clone
        working-directory: /transformers
@ -229,11 +267,9 @@ jobs:
      - name: ROCM-SMI
        run: |
          rocm-smi
-
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
-
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
@ -251,7 +287,7 @@ jobs:
        working-directory: /transformers
        run: |
          pip install -r examples/pytorch/_tests_requirements.txt
-          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
+          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch

      - name: Failure short reports
        if: ${{ failure() }}
@ -265,17 +301,73 @@ jobs:
          name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
          path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports

-  run_torch_cuda_extensions_gpu:
-    if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
-    name: Torch ROCm deepspeed tests
-    needs: check_runners
+  run_pipelines_torch_gpu:
+    name: PyTorch pipelines tests
    strategy:
      fail-fast: false
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
-      image: ${{ inputs.docker }}
+      image: huggingface/transformers-pytorch-amd-gpu
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all pipeline tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
+
+  run_torch_cuda_extensions_gpu:
+    name: Torch ROCm deepspeed tests
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+
+    runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    needs: setup
+    container:
+      image: huggingface/transformers-pytorch-deepspeed-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
@ -289,7 +381,6 @@ jobs:
      - name: ROCM-SMI
        run: |
          rocm-smi
-
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
@ -309,7 +400,7 @@ jobs:

      - name: Run all tests on GPU
        working-directory: /transformers
-        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended -m "not not_device_test"
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended

      - name: Failure short reports
        if: ${{ failure() }}
@ -323,27 +414,106 @@ jobs:
          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
          path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports

-  send_results:
-    name: Slack Report
+  run_extract_warnings:
+    name: Extract warnings in CI artifacts
+    runs-on: ubuntu-22.04
+    if: always()
    needs: [
      check_runner_status,
      check_runners,
      setup,
-      run_models_gpu,
-      run_pipelines_torch_gpu,
+      run_models_gpu_single_gpu,
+      run_models_gpu_multi_gpu,
      run_examples_gpu,
+      run_pipelines_torch_gpu,
      run_torch_cuda_extensions_gpu
    ]
-    if: ${{ always() }}
-    uses: ./.github/workflows/slack-report.yml
-    with:
-      job: ${{ inputs.job }}
-      # This would be `skipped` if `setup` is skipped.
-      setup_status: ${{ needs.setup.result }}
-      slack_report_channel: ${{ inputs.slack_report_channel }}
-      # This would be an empty string if `setup` is skipped.
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
-      ci_event: ${{ inputs.ci_event }}
+    steps:
+      - name: Checkout transformers
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 2

-    secrets: inherit
+      - name: Install transformers
+        run: pip install transformers
+
+      - name: Show installed libraries and their versions
+        run: pip freeze
+
+      - name: Create output directory
+        run: mkdir warnings_in_ci
+
+      - uses: actions/download-artifact@v4
+        with:
+          path: warnings_in_ci
+
+      - name: Show artifacts
+        run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
+        working-directory: warnings_in_ci
+
+      - name: Extract warnings in CI artifacts
+        run: |
+          python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
+          echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
+
+      - name: Upload artifact
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: warnings_in_ci
+          path: warnings_in_ci/selected_warnings.json
+
+  send_results:
+    name: Send results to webhook
+    runs-on: ubuntu-22.04
+    if: always()
+    needs: [
+      check_runner_status,
+      check_runners,
+      setup,
+      run_models_gpu_single_gpu,
+      run_models_gpu_multi_gpu,
+      run_examples_gpu,
+      run_pipelines_torch_gpu,
+      run_torch_cuda_extensions_gpu,
+      run_extract_warnings
+    ]
+    steps:
+      - name: Preliminary job status
+        shell: bash
+        # For the meaning of these environment variables, see the job `Setup`
+        run: |
+          echo "Runner availability: ${{ needs.check_runner_status.result }}"
+          echo "Runner status: ${{ needs.check_runners.result }}"
+          echo "Setup status: ${{ needs.setup.result }}"
+
+      - uses: actions/checkout@v4
+      - uses: actions/download-artifact@v4
+      - name: Send message to Slack
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          CI_SLACK_CHANNEL_ID_DAILY_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }}
+          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
+          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }}
+          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+          CI_EVENT: Scheduled CI (AMD) - ${{ inputs.gpu_flavor }}
+          CI_SHA: ${{ github.sha }}
+          CI_WORKFLOW_REF: ${{ github.workflow_ref }}
+          RUNNER_STATUS: ${{ needs.check_runner_status.result }}
+          RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
+          SETUP_STATUS: ${{ needs.setup.result }}
+        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
+        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
+        run: |
+          sudo apt-get install -y curl
+          pip install slack_sdk
+          pip show slack_sdk
+          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
+
+      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
+      - name: Failure table artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: test_failure_tables
+          path: test_failure_tables
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@ -2,74 +2,18 @@ name: Self-hosted runner (scheduled)


 on:
+  repository_dispatch:
+  schedule:
+    - cron: "17 2 * * *"
  push:
    branches:
-      - run_scheduled_ci*
+      - check_quant

 jobs:
-  model-ci:
-    name: Model CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-models"
-      runner: daily-ci
-      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-pipeline-torch"
-      runner: daily-ci
-      docker: huggingface/transformers-pytorch-gpu
-      ci_event: Daily CI
-    secrets: inherit
-
-  tf-pipeline:
-    name: TF pipeline CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_pipelines_tf_gpu
-      slack_report_channel: "#transformers-ci-daily-pipeline-tf"
-      runner: daily-ci
-      docker: huggingface/transformers-tensorflow-gpu
-      ci_event: Daily CI
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-examples"
-      runner: daily-ci
-      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-deepspeed"
-      runner: daily-ci
-      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
-      ci_event: Daily CI
-      working-directory-prefix: /workspace
-    secrets: inherit
-
  quantization-ci:
    name: Quantization CI
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_quantization_torch_gpu
      slack_report_channel: "#transformers-ci-daily-quantization"
-      runner: daily-ci
-      docker: huggingface/transformers-quantization-latest-gpu
-      ci_event: Daily CI
    secrets: inherit
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -15,19 +15,6 @@ on:
      slack_report_channel:
        required: true
        type: string
-      runner:
-        required: true
-        type: string
-      docker:
-        required: true
-        type: string
-      ci_event:
-        required: true
-        type: string
-      working-directory-prefix:
-        default: ''
-        required: false
-        type: string

 env:
  HF_HOME: /mnt/cache
@ -51,7 +38,7 @@ jobs:
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
    container:
      image: huggingface/transformers-all-latest-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -83,7 +70,7 @@ jobs:
        run: |
          echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
          echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-
+      
      - id: set-matrix-quantization
        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
        name: Identify quantization method to test
@ -109,8 +96,6 @@ jobs:
      folder_slices: ${{ needs.setup.outputs.folder_slices }}
      machine_type: ${{ matrix.machine_type }}
      slice_id: ${{ matrix.slice_id }}
-      runner: ${{ inputs.runner }}
-      docker: ${{ inputs.docker }}
    secrets: inherit

  run_pipelines_torch_gpu:
@ -120,7 +105,7 @@ jobs:
      fail-fast: false
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
    container:
      image: huggingface/transformers-pytorch-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -170,7 +155,7 @@ jobs:
      fail-fast: false
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
    container:
      image: huggingface/transformers-tensorflow-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -221,7 +206,7 @@ jobs:
      fail-fast: false
      matrix:
        machine_type: [single-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
    container:
      image: huggingface/transformers-all-latest-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -272,88 +257,69 @@ jobs:
      fail-fast: false
      matrix:
        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
    container:
-      image: ${{ inputs.docker }}
+      image: huggingface/transformers-pytorch-deepspeed-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: git fetch && git checkout ${{ github.sha }}

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

-      - name: Update / Install some packages (for Past CI)
-        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
-        run: |
-          python3 -m pip install -U datasets
-          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
-
      - name: Remove cached torch extensions
        run: rm -rf /github/home/.cache/torch_extensions/

      # To avoid unknown test failures
-      - name: Pre build DeepSpeed *again* (for daily CI)
-        if: ${{ contains(inputs.ci_event, 'Daily CI') }}
-        working-directory: ${{ inputs.working-directory-prefix }}/
+      - name: Pre build DeepSpeed *again*
+        working-directory: /workspace
        run: |
          python3 -m pip uninstall -y deepspeed
          DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check

-      # To avoid unknown test failures
-      - name: Pre build DeepSpeed *again* (for nightly & Past CI)
-        if: ${{ contains(inputs.ci_event, 'Nightly CI') || contains(inputs.ci_event, 'Past CI') }}
-        working-directory: ${{ inputs.working-directory-prefix }}/
-        run: |
-          python3 -m pip uninstall -y deepspeed
-          rm -rf DeepSpeed
-          git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
-          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
-
      - name: NVIDIA-SMI
        run: |
          nvidia-smi

      - name: Environment
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: |
-          python3 utils/print_env.py
+          python utils/print_env.py

      - name: Show installed libraries and their versions
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: pip freeze

      - name: Run all tests on GPU
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: |
-          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+          python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended

      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+        run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt

      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports

  run_quantization_torch_gpu:
    if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
    name: " "
    needs: setup
    strategy:
-      max-parallel: 4
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
    container:
      image: huggingface/transformers-quantization-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -468,6 +434,5 @@ jobs:
      # This would be an empty string if `setup` is skipped.
      folder_slices: ${{ needs.setup.outputs.folder_slices }}
      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
-      ci_event: ${{ inputs.ci_event }}
-
+      
    secrets: inherit
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@ -18,12 +18,7 @@ on:
      quantization_matrix:
        required: true
        type: string
-      ci_event:
-        required: true
-        type: string

-env:
-  TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}

 jobs:
  send_results:
@ -48,7 +43,7 @@ jobs:
          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_EVENT: ${{ inputs.ci_event }}
+          CI_EVENT: scheduled
          CI_SHA: ${{ github.sha }}
          CI_WORKFLOW_REF: ${{ github.workflow_ref }}
          CI_TEST_JOB: ${{ inputs.job }}
@ -59,17 +54,18 @@ jobs:
        # empty string, and the called script still get one argument (which is the emtpy string).
        run: |
          sudo apt-get install -y curl
-          pip install huggingface_hub
          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service.py "${{ inputs.folder_slices }}"

      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
      - name: Failure table artifacts
+        # Only the model testing job is concerned for this step
+        if: ${{ inputs.job == 'run_models_gpu' }}
        uses: actions/upload-artifact@v4
        with:
-          name: ci_results_${{ inputs.job }}
-          path: ci_results_${{ inputs.job }}
+          name: prev_ci_results
+          path: prev_ci_results
      
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
@ -79,23 +75,13 @@ jobs:
          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
-          CI_EVENT: ${{ inputs.ci_event }}
+          CI_EVENT: scheduled
          CI_SHA: ${{ github.sha }}
-          CI_TEST_JOB: ${{ inputs.job }}
          SETUP_STATUS: ${{ inputs.setup_status }}
        # We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
        # `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
        run: |
          sudo apt-get install -y curl
-          pip install huggingface_hub
          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" 
-
-      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
-      - name: Failure table artifacts
-        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ci_results_${{ inputs.job }}
-          path: ci_results_${{ inputs.job }}
--- a/.github/workflows/ssh-runner.yml
+++ b/.github/workflows/ssh-runner.yml
@ -9,11 +9,9 @@ on:
      docker_image:
        description: 'Name of the Docker image'
        required: true
-      num_gpus:
-        description: 'Type of the number of gpus to use (`single` or `multi`)'
-        required: true

 env:
+  IS_GITHUB_CI: "1"
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  HF_HOME: /mnt/cache 
  TRANSFORMERS_IS_CI: yes 
@ -22,13 +20,12 @@ env:
  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. 
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} 
  TF_FORCE_GPU_ALLOW_GROWTH: true 
-  CUDA_VISIBLE_DEVICES: 0,1
  RUN_PT_TF_CROSS_TESTS: 1

 jobs:
  ssh_runner:
    name: "SSH"
-    runs-on: ["${{ github.event.inputs.num_gpus }}-gpu", nvidia-gpu, "${{ github.event.inputs.runner_type }}", ci]
+    runs-on: [single-gpu, nvidia-gpu, "${{ github.event.inputs.runner_type }}", ci]
    container:
      image: ${{ github.event.inputs.docker_image }}
      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -53,22 +50,11 @@ jobs:
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
-
-      - name: Store Slack infos
-        #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
-        shell: bash
-        run: |
-          if [ "${{ secrets[format('{0}_{1}', github.actor, 'SLACK_ID')] }}" != "" ]; then
-            echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', github.actor, 'SLACK_ID')] }}" >> $GITHUB_ENV
-          else
-            echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV
-          fi
-
+      
      - name: Tailscale # In order to be able to SSH when a test fails
-        uses: huggingface/tailscale-action@main
+        uses: huggingface/tailscale-action@v1
        with:
          authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
-          slackChannel: ${{ env.SLACKCHANNEL }}
+          slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
          waitForSSH: true
-          sshTimeout: 15m
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -9,15 +9,13 @@ jobs:
    name: Close Stale Issues
    if: github.repository == 'huggingface/transformers'
    runs-on: ubuntu-22.04
-    permissions:
-      issues: write
    env:
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
    - uses: actions/checkout@v4

    - name: Setup Python
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v4
      with:
        python-version: 3.8

--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@ -1,18 +0,0 @@
-on:
-  push:
-
-name: Secret Leaks
-
-permissions:
-  contents: read
-
-jobs:
-  trufflehog:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Secret Scanning
-        uses: trufflesecurity/trufflehog@main
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -61,10 +61,7 @@ feedback.
 The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.

 Before you report an issue, we would really appreciate it if you could **make sure the bug was not
-already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) or on our [discord](https://discord.com/invite/hugging-face-879548962464493619) first. This helps us respond quicker to fixing issues related to the library versus general questions.
-
-> [!TIP]
-> We have a [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat), and we highly encourage you to ask all your questions there. There is always a chance your bug can be fixed with a simple flag 👾🔫
+already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) first. This helps us respond quicker to fixing issues related to the library versus general questions.

 Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:

@ -132,7 +129,7 @@ You will need basic `git` proficiency to contribute to
 manual. Type `git --help` in a shell and enjoy! If you prefer books, [Pro
 Git](https://git-scm.com/book/en/v2) is a very good reference.

-You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main/setup.py#L449)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main/setup.py#L426)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:

 1. Fork the [repository](https://github.com/huggingface/transformers) by
   clicking on the **[Fork](https://github.com/huggingface/transformers/fork)** button on the repository's page. This creates a copy of the code
@ -163,7 +160,7 @@ You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main
   If 🤗 Transformers was already installed in the virtual environment, remove
   it with `pip uninstall transformers` before reinstalling it in editable
   mode with the `-e` flag.
-
+   
   Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
   (PyTorch, TensorFlow and/or Flax) then do:
@ -222,7 +219,7 @@ You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main

   If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
   make sure you install the documentation builder:
-
+   
   ```bash
   pip install ".[docs]"
   ```
@ -341,12 +338,12 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_ne
 RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
 ```

-Like the slow tests, there are other environment variables available which are not enabled by default during testing:
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
 - `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
 - `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration.
 - `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration.

-More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).
+More environment variables and additional information can be found in the [testing_utils.py](src/transformers/testing_utils.py).

 🤗 Transformers uses `pytest` as a test runner only. It doesn't use any
 `pytest`-specific features in the test suite itself.
--- a/12
+++ b/12
@ -1,11 +1,11 @@
-.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples benchmark
+.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples

 # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
 export PYTHONPATH = src

 check_dirs := examples tests src utils

-exclude_folders :=  ""
+exclude_folders := examples/research_projects

 modified_only_fixup:
 	$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
@ -53,14 +53,15 @@ quality:
 	@python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
 	ruff check $(check_dirs) setup.py conftest.py
 	ruff format --check $(check_dirs) setup.py conftest.py
+	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
 	python utils/check_doc_toc.py
-	python utils/check_docstrings.py --check_all


 # Format source code automatically and check is there are any problems left that need manual fixing

 extra_style_checks:
+	python utils/custom_init_isort.py
 	python utils/sort_auto_mappings.py
 	python utils/check_doc_toc.py --fix_and_overwrite

@ -95,11 +96,6 @@ test:
 test-examples:
 	python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/

-# Run benchmark
-
-benchmark:
-	python3 benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=diff backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
-
 # Run tests for SageMaker DLC release

 test-sagemaker: # install sagemaker dependencies in advance with pip install .[sagemaker]
--- a/README.md
+++ b/README.md
@ -25,30 +25,39 @@ limitations under the License.
 </p>

 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <b>English</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-	<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/i18n/README_de.md
+++ b/i18n/README_de.md
@ -25,30 +25,39 @@ limitations under the License.
 </p>

 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
        <b>Deutsch</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/i18n/README_es.md
+++ b/i18n/README_es.md
@ -20,30 +20,39 @@ limitations under the License.
    <br>
 </p>
 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
        <b>Español</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/i18n/README_fr.md
+++ b/i18n/README_fr.md
@ -25,30 +25,39 @@ limitations under the License.
 </p>

 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Construction" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="Version GitHub" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Pacte des contributeurs" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
        <b>Français</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

@ -279,6 +288,7 @@ Suivez les pages d'installation de Flax, PyTorch ou TensorFlow pour voir comment

 Nombre actuel de points de contrôle : ![](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen)

+
 🤗 Transformers fournit actuellement les architectures suivantes: consultez [ici](https://huggingface.co/docs/transformers/model_summary) pour un résumé global de chacune d'entre elles.

 Pour vérifier si chaque modèle a une implémentation en Flax, PyTorch ou TensorFlow, ou s'il a un tokenizer associé pris en charge par la bibliothèque 🤗 Tokenizers, consultez [ce tableau](https://huggingface.co/docs/transformers/index#supported-frameworks).
--- a/i18n/README_hd.md
+++ b/i18n/README_hd.md
@ -45,30 +45,39 @@ checkpoint: जाँच बिंदु
    <br>
 </p>
 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
        <b>हिन्दी</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/i18n/README_ja.md
+++ b/i18n/README_ja.md
@ -55,30 +55,39 @@ user: ユーザ
    <br>
 </p>
 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
        <b>日本語</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/i18n/README_ko.md
+++ b/i18n/README_ko.md
@ -20,31 +20,39 @@ limitations under the License.
    <br>
 </p>
 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
        <b>한국어</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
-
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/i18n/README_pt-br.md
+++ b/i18n/README_pt-br.md
@ -25,30 +25,39 @@ limitations under the License.
 </p>

 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
        <b>Рortuguês</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/i18n/README_ru.md
+++ b/i18n/README_ru.md
@ -25,30 +25,39 @@ limitations under the License.
 </p>

 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
        <b>Русский</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    <p>
 </h4>

--- a/i18n/README_te.md
+++ b/i18n/README_te.md
@ -26,11 +26,21 @@ limitations under the License.


 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

@ -38,19 +48,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
        <b>తెలుగు</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

@ -284,6 +293,7 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్‌స్టా

 🤗 ట్రాన్స్‌ఫార్మర్లు ప్రస్తుతం కింది ఆర్కిటెక్చర్‌లను అందజేస్తున్నాయి: వాటిలో ప్రతి ఒక్కటి ఉన్నత స్థాయి సారాంశం కోసం [ఇక్కడ](https://huggingface.co/docs/transformers/model_summary) చూడండి.

+
 ఈ అమలులు అనేక డేటాసెట్‌లలో పరీక్షించబడ్డాయి (ఉదాహరణ స్క్రిప్ట్‌లను చూడండి) మరియు అసలైన అమలుల పనితీరుతో సరిపోలాలి. మీరు [డాక్యుమెంటేషన్](https://github.com/huggingface/transformers/tree/main/examples) యొక్క ఉదాహరణల విభాగంలో పనితీరుపై మరిన్ని వివరాలను కనుగొనవచ్చు.

 ## ఇంకా నేర్చుకో
--- a/i18n/README_vi.md
+++ b/i18n/README_vi.md
@ -25,30 +25,39 @@ limitations under the License.
 </p>

 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
        <b>Tiếng việt</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
    </p>
 </h4>

--- a/i18n/README_zh-hans.md
+++ b/i18n/README_zh-hans.md
@ -45,11 +45,21 @@ checkpoint: 检查点
    <br>
 </p>
 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

@ -57,18 +67,17 @@ checkpoint: 检查点
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
        <b>简体中文</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/i18n/README_zh-hant.md
+++ b/i18n/README_zh-hant.md
@ -57,30 +57,39 @@ user: 使用者
    <br>
 </p>
 <p align="center">
-    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
-    <a href="https://github.com/huggingface/transformers/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg"></a>
-    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md"><img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg"></a>
+    <a href="https://circleci.com/gh/huggingface/transformers">
+        <img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE">
+        <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
+    <a href="https://github.com/huggingface/transformers/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/transformers.svg">
+    </a>
+    <a href="https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md">
+        <img alt="Contributor Covenant" src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg">
+    </a>
    <a href="https://zenodo.org/badge/latestdoi/155220641"><img src="https://zenodo.org/badge/155220641.svg" alt="DOI"></a>
 </p>

 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
        <b>繁體中文</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
    </p>
 </h4>

--- a/SECURITY.md
+++ b/SECURITY.md
@ -14,7 +14,7 @@ Models uploaded on the Hugging Face Hub come in different formats. We heavily re
 models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized
 by the transformers library), as developed specifically to prevent arbitrary code execution on your system.

-To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
+To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetenstors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.

 ### Remote code

@ -36,4 +36,5 @@ Please inspect the code of the tools before passing them to the Agent to protect

 ## Reporting a Vulnerability

-Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software.
+🤗 Please feel free to submit vulnerability reports to our private bug bounty program at https://hackerone.com/hugging_face. You'll need to request access to the program by emailing security@huggingface.co.
+Note that you'll need to be invited to our program, so send us a quick email at security@huggingface.co if you've found a vulnerability.
--- a/awesome-transformers.md
+++ b/awesome-transformers.md
@ -596,7 +596,7 @@ Keywords: Data-Centric AI, Data Quality, Noisy Labels, Outlier Detection, Active

 ## [BentoML](https://github.com/bentoml/BentoML)

-[BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. 
+[BentoML](https://github.com/bentoml) is the unified framework for for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. 
 All Hugging Face models and pipelines can be seamlessly integrated into BentoML applications, enabling the running of models on the most suitable hardware and independent scaling based on usage.

 Keywords: BentoML, Framework, Deployment, AI Applications
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@ -1,326 +0,0 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Run benchmark using the `optimum-benchmark` library with some customization in `transformers`.
-
-Assume we are under `transformers` root directory: (make sure the commits are valid commits)
-```bash
-python benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=9b9c7f03da625b13643e99205c691fe046461724 --metrics=decode.latency.mean,per_token.latency.mean,per_token.throughput.value backend.model=google/gemma-2b benchmark.input_shapes.sequence_length=5,7 benchmark.input_shapes.batch_size=1,2 --multirun
-```
-"""
-
-import argparse
-import glob
-import json
-import os.path
-import re
-import tempfile
-from contextlib import contextmanager
-from pathlib import Path
-
-from git import Repo
-
-from huggingface_hub import HfApi
-
-from optimum_benchmark import Benchmark
-from optimum_benchmark_wrapper import main
-
-
-PATH_TO_REPO = Path(__file__).parent.parent.resolve()
-
-
-@contextmanager
-def checkout_commit(repo: Repo, commit_id: str):
-    """
-    Context manager that checks out a given commit when entered, but gets back to the reference it was at on exit.
-    Args:
-        repo (`git.Repo`): A git repository (for instance the Transformers repo).
-        commit_id (`str`): The commit reference to checkout inside the context manager.
-    """
-    current_head = repo.head.commit if repo.head.is_detached else repo.head.ref
-
-    try:
-        repo.git.checkout(commit_id)
-        yield
-
-    finally:
-        repo.git.checkout(current_head)
-
-
-def summarize(run_dir, metrics, expand_metrics=False):
-    """Produce a summary for each optimum-benchmark launched job's output directory found in `run_dir`.
-
-    Each summary's format is as follows (for `expand_metrics=False`):
-    ```
-    {
-        "model": "google/gemma-2b",
-        "commit": "3cd6ed22e4d49219f300f5055e71e3929aba20d7",
-        "config": "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5",
-        "metrics": {
-            "decode.latency.mean": 1.624666809082031,
-            "per_token.latency.mean": 0.012843788806628804,
-            "per_token.throughput.value": 77.85864553330948
-        }
-    }
-    ```
-    """
-    reports = glob.glob(os.path.join(run_dir, "**/benchmark_report.json"), recursive=True)
-    report_dirs = [str(Path(report).parent) for report in reports]
-
-    summaries = []
-    for report_dir in report_dirs:
-        commit = re.search(r"/commit=([^/]+)", report_dir).groups()[0]
-
-        if not os.path.isfile(os.path.join(report_dir, "benchmark.json")):
-            continue
-        benchmark = Benchmark.from_json(os.path.join(report_dir, "benchmark.json"))
-        report = benchmark.report
-
-        model = benchmark.config.backend["model"]
-
-        # Ths looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
-        # (we rely on the usage of hydra's `${hydra.job.override_dirname}`.)
-        benchmark_name = re.sub(f"backend.model={model},*", "", report_dir)
-        benchmark_name = str(Path(benchmark_name).parts[-1])
-        if benchmark_name.startswith("commit="):
-            benchmark_name = benchmark.config.name
-
-        metrics_values = {}
-        # post-processing of report: show a few selected/important metric
-        for metric in metrics:
-            keys = metric.split(".")
-            value = report.to_dict()
-            current = metrics_values
-            for key in keys:
-                # Avoid KeyError when a user's specified metric has typo.
-                # TODO: Give warnings.
-                if key not in value:
-                    continue
-                value = value[key]
-
-                if expand_metrics:
-                    if isinstance(value, dict):
-                        if key not in current:
-                            current[key] = {}
-                            current = current[key]
-                    else:
-                        current[key] = value
-
-            if not expand_metrics:
-                metrics_values[metric] = value
-
-        # show some config information
-        print(f"model: {model}")
-        print(f"commit: {commit}")
-        print(f"config: {benchmark_name}")
-        if len(metrics_values) > 0:
-            print("metrics:")
-            if expand_metrics:
-                print(metrics_values)
-            else:
-                for metric, value in metrics_values.items():
-                    print(f"  - {metric}: {value}")
-        print("-" * 80)
-
-        summary = {
-            "model": model,
-            "commit": commit,
-            "config": benchmark_name,
-            "metrics": metrics_values,
-        }
-        summaries.append(summary)
-
-        with open(os.path.join(report_dir, "summary.json"), "w") as fp:
-            json.dump(summary, fp, indent=4)
-
-    return summaries
-
-
-def combine_summaries(summaries):
-    """Combine a list of summary obtained from the function `summarize`.
-
-    The combined summary's format is as follows:
-    ```
-    "google/gemma-2b": {
-        "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5": {
-            "3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
-                "metrics": {"decode.latency.mean": 1.624666809082031}
-            },
-            "c97ee28b117c0abe8e08891f402065e4df6d72aa": {
-                "metrics": {"decode.latency.mean": 1.6278163452148438}
-            }
-        },
-        "benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=5": {
-            "3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
-                "metrics": {"decode.latency.mean": 1.6947791748046876}
-            },
-            "c97ee28b117c0abe8e08891f402065e4df6d72aa": {
-                "metrics": {
-                    "decode.latency.mean": 1.6980519409179688}
-            }
-        }
-    }
-    ```
-    """
-    combined = {}
-    for summary in summaries:
-        model = summary["model"]
-        config = summary["config"]
-        commit = summary["commit"]
-
-        if model not in combined:
-            combined[model] = {}
-
-        if config not in combined[model]:
-            combined[model][config] = {}
-
-        if commit not in combined[model][config]:
-            combined[model][config][commit] = {"metrics": summary["metrics"]}
-
-    with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp:
-        json.dump(combined, fp, indent=4)
-
-    print(json.dumps(combined, indent=4))
-
-    return combined
-
-
-if __name__ == "__main__":
-
-    def list_str(values):
-        return values.split(",")
-
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
-    parser.add_argument("--config-name", type=str, required=True, help="The config name.")
-
-    # arguments specific to this wrapper for our own customization
-    parser.add_argument("--ensure_empty", type=bool, default=True, help="If to create a temporary directory.")
-    parser.add_argument(
-        "--commit",
-        type=list_str,
-        default="",
-        help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.",
-    )
-    parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.")
-
-    parser.add_argument("--repo_id", type=str, default=None, help="The repository to which the file will be uploaded.")
-    parser.add_argument("--path_in_repo", type=str, default=None, help="Relative filepath in the repo.")
-    parser.add_argument("--token", type=str, default=None, help="A valid user access token (string).")
-
-    args, optimum_benchmark_args = parser.parse_known_args()
-
-    repo = Repo(PATH_TO_REPO)
-
-    metrics = [
-        "prefill.latency.mean",
-        "prefill.throughput.value",
-        "decode.latency.mean",
-        "decode.throughput.value",
-        "per_token.latency.mean",
-        "per_token.throughput.value",
-    ]
-    if args.metrics is not None:
-        metrics = args.metrics.split(",")
-
-    # Get `backend.model` in a hacky way: We want to control the experiment flow manually.
-    models = [""]
-    for idx, arg in enumerate(optimum_benchmark_args):
-        if arg.startswith("backend.model="):
-            models = arg[len("backend.model=") :]
-            models = models.split(",")
-            break
-    optimum_benchmark_args = [arg for arg in optimum_benchmark_args if not arg.startswith("backend.model=")]
-
-    # Get the commit(s)
-    current_head = str(repo.head.commit) if repo.head.is_detached else str(repo.head.ref)
-    commits = [x for x in args.commit if x != ""]
-    if len(commits) == 0:
-        commits = [current_head]
-    elif len(commits) == 1 and commits[0] == "diff":
-        # compare to `main`
-        commits = ["main", current_head]
-
-    # Get the specified run directory
-    run_dir_arg_idx, run_dir = -1, None
-    sweep_dir_arg_idx, sweep_dir = -1, None
-    for idx, arg in enumerate(optimum_benchmark_args):
-        if arg.startswith("hydra.run.dir="):
-            run_dir = arg[len("hydra.run.dir=") :]
-            run_dir_arg_idx = idx
-        elif arg.startswith("hydra.sweep.dir="):
-            sweep_dir = arg[len("hydra.sweep.dir=") :]
-            sweep_dir_arg_idx = idx
-    exp_run_dir, arg_dix, arg_name = (
-        (sweep_dir, sweep_dir_arg_idx, "hydra.sweep.dir")
-        if "--multirun" in optimum_benchmark_args
-        else (run_dir, run_dir_arg_idx, "hydra.run.dir")
-    )
-
-    # TODO: not hardcoded
-    if exp_run_dir is None and args.ensure_empty:
-        exp_run_dir = "_benchmark"
-
-    if args.ensure_empty:
-        os.makedirs(exp_run_dir, exist_ok=True)
-        exp_run_dir = tempfile.mkdtemp(dir=exp_run_dir)
-
-    run_summaries = []
-    for commit in commits:
-        with checkout_commit(repo, commit):
-            commit = str(repo.head.commit)
-
-            commit_run_dir = exp_run_dir
-            if exp_run_dir is not None:
-                commit_run_dir = os.path.join(exp_run_dir, rf"commit\={commit}")
-
-            print(f"Run benchmark on commit: {commit}")
-
-            for model in models:
-                model_arg = [f"backend.model={model}"] if model != "" else []
-                dir_args = []
-                if commit_run_dir is not None:
-                    if arg_dix > -1:
-                        optimum_benchmark_args[arg_dix] = f"{arg_name}={commit_run_dir}"
-                    else:
-                        dir_args = [
-                            f"hydra.sweep.dir={commit_run_dir}",
-                            f"hydra.run.dir={commit_run_dir}/" + "${hydra.job.override_dirname}",
-                        ]
-                main(args.config_dir, args.config_name, model_arg + dir_args + optimum_benchmark_args)
-
-            if commit_run_dir is not None:
-                # Need to remove the `\` character
-                summaries = summarize(commit_run_dir.replace("\\", ""), metrics)
-                run_summaries.extend(summaries)
-
-    # aggregate the information across the commits
-    if exp_run_dir is not None:
-        with open(os.path.join(exp_run_dir, "summaries.json"), "w") as fp:
-            json.dump(run_summaries, fp, indent=4)
-
-        combined_summary = combine_summaries(run_summaries)
-
-        if args.repo_id is not None and args.path_in_repo is not None:
-            # Upload to Hub
-            api = HfApi()
-            api.upload_folder(
-                folder_path=exp_run_dir,
-                path_in_repo=args.path_in_repo,
-                repo_id=args.repo_id,
-                repo_type="dataset",
-                token=args.token,
-            )
--- a/benchmark/config/generation.yaml
+++ b/benchmark/config/generation.yaml
@ -1,57 +0,0 @@
-defaults:
-  - benchmark # inheriting benchmark schema
-  - scenario: inference
-  - launcher: process
-  - backend: pytorch
-  - _self_ # for hydra 1.1 compatibility
-
-name: pytorch_generate
-
-launcher:
-  start_method: spawn
-  device_isolation: true
-  device_isolation_action: warn
-
-backend:
-  device: cuda
-  device_ids: 0
-  no_weights: true
-  model: meta-llama/Llama-2-7b-hf
-  cache_implementation: static
-  torch_compile: true
-  torch_dtype: float16
-  torch_compile_config:
-    backend: inductor
-    mode: reduce-overhead
-    fullgraph: true
-
-scenario:
-  input_shapes:
-    batch_size: 1
-    sequence_length: 7
-  generate_kwargs:
-    max_new_tokens: 128
-    min_new_tokens: 128
-    do_sample: false
-  memory: true
-  latency: true
-  iterations: 2
-  duration: 0
-
-
-# hydra/cli specific settings
-hydra:
-  run:
-    # where to store run results
-    dir: runs/${name}
-  job:
-    # change working directory to the run directory
-    chdir: true
-    env_set:
-      # set environment variable OVERRIDE_BENCHMARKS to 1
-      # to not skip benchmarks that have been run before
-      OVERRIDE_BENCHMARKS: 1
-      LOG_LEVEL: WARN
-  sweep:
-    dir: multirun
-    subdir: ${hydra.job.override_dirname}
--- a/benchmark/optimum_benchmark_wrapper.py
+++ b/benchmark/optimum_benchmark_wrapper.py
@ -1,16 +0,0 @@
-import argparse
-import subprocess
-
-
-def main(config_dir, config_name, args):
-    subprocess.run(["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] + args)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
-    parser.add_argument("--config-name", type=str, required=True, help="The config name.")
-    args, unknown = parser.parse_known_args()
-
-    main(args.config_dir, args.config_name, unknown)
--- a/conftest.py
+++ b/conftest.py
@ -53,7 +53,7 @@ NOT_DEVICE_TESTS = {
    "test_torch_save_load",
    "test_initialization",
    "test_forward_signature",
-    "test_model_get_set_embeddings",
+    "test_model_common_attributes",
    "test_model_main_input_name",
    "test_correct_missing_keys",
    "test_tie_model_weights",
@ -71,7 +71,7 @@ NOT_DEVICE_TESTS = {
    "ModelTester::test_pipeline_",
    "/repo_utils/",
    "/utils/",
-    "/agents/",
+    "/tools/",
 }

 # allow having multiple repository checkouts and not needing to remember to rerun
@ -94,7 +94,7 @@ def pytest_configure(config):
    config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
    config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
    config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
-    config.addinivalue_line("markers", "agent_tests: mark the agent tests that are run on their specific schedule")
+    config.addinivalue_line("markers", "tool_tests: mark the tool tests that are run on their specific schedule")
    config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")


--- a/docker/consistency.dockerfile
+++ b/docker/consistency.dockerfile
@ -1,16 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-USER root
-ARG REF=main
-RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
-RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
-# tensorflow pin matching setup.py
-RUN uv pip install --no-cache-dir pypi-kenlm
-RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]"
-RUN git lfs install
-
-RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/custom-tokenizers.dockerfile
+++ b/docker/custom-tokenizers.dockerfile
@ -1,26 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-
-RUN wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz
-RUN tar xvf jumanpp-2.0.0-rc3.tar.xz
-RUN mkdir jumanpp-2.0.0-rc3/bld
-WORKDIR ./jumanpp-2.0.0-rc3/bld
-RUN wget -LO catch.hpp https://github.com/catchorg/Catch2/releases/download/v2.13.8/catch.hpp
-RUN mv catch.hpp ../libs/
-RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
-RUN make install -j 10
-
-
-RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
-RUN uv pip install  --no-cache-dir "transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
-# spacy is not used so not tested. Causes to failures. TODO fix later
-RUN python3 -m unidic download
-RUN pip uninstall -y transformers
-
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
-RUN apt remove -y g++ cmake  xz-utils libprotobuf-dev protobuf-compiler
--- a/docker/examples-tf.dockerfile
+++ b/docker/examples-tf.dockerfile
@ -1,12 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git
-RUN apt-get install -y g++ cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv
-RUN uv pip install --no-cache-dir -U pip setuptools albumentations seqeval
-RUN pip install  --upgrade --no-cache-dir "transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3" 
-RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/examples-torch.dockerfile
+++ b/docker/examples-torch.dockerfile
@ -1,11 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
-RUN uv pip install --no-cache-dir librosa "transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
-RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/exotic-models.dockerfile
+++ b/docker/exotic-models.dockerfile
@ -1,17 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv &&  uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir  --no-deps timm accelerate
-RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
-# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
-RUN pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose'  'dataset'
-# RUN git clone https://github.com/facebookresearch/detectron2.git
-# RUN python3 -m pip install --no-cache-dir -e detectron2
-RUN pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3'
-RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/jax-light.dockerfile
+++ b/docker/jax-light.dockerfile
@ -1,10 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv &&  uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]"
-RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/pipeline-tf.dockerfile
+++ b/docker/pipeline-tf.dockerfile
@ -1,10 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake g++
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3" tensorflow_probability
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/pipeline-torch.dockerfile
+++ b/docker/pipeline-torch.dockerfile
@ -1,11 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
-RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
-RUN pip uninstall -y transformers
--- a/docker/quality.dockerfile
+++ b/docker/quality.dockerfile
@ -1,9 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y time git 
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip install uv &&  uv venv
-RUN uv pip install --no-cache-dir -U pip setuptools GitPython "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ruff]" urllib3
-RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/tf-light.dockerfile
+++ b/docker/tf-light.dockerfile
@ -1,12 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ pkg-config openssh-client git
-RUN apt-get install -y  cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3" 
-RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/torch-jax-light.dockerfile
+++ b/docker/torch-jax-light.dockerfile
@ -1,16 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-deps accelerate
-RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]"
-
-
-# RUN pip install --no-cache-dir "scipy<1.13" "transformers[flax,testing,sentencepiece,flax-speech,vision]"
-
-RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/torch-light.dockerfile
+++ b/docker/torch-light.dockerfile
@ -1,11 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken]"
-RUN pip uninstall -y transformers
--- a/docker/torch-tf-light.dockerfile
+++ b/docker/torch-tf-light.dockerfile
@ -1,19 +0,0 @@
-FROM python:3.10-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-RUN echo ${REF}
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
-RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN git lfs install
-
-RUN uv pip install --no-cache-dir pypi-kenlm
-RUN pip install --no-cache-dir  "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,sentencepiece,vision,testing]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3" librosa
-
-
-RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
@ -9,11 +9,11 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).

-ARG PYTORCH='2.4.0'
+ARG PYTORCH='2.2.1'
 # (not always a valid torch version)
-ARG INTEL_TORCH_EXT='2.3.0'
+ARG INTEL_TORCH_EXT='2.2.0'
 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu121'
+ARG CUDA='cu118'

 RUN apt update
 RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
@ -48,13 +48,6 @@ RUN python3 -m pip install --no-cache-dir decord av==9.2.0
 # Some slow tests require bnb
 RUN python3 -m pip install --no-cache-dir bitsandbytes

-# Some tests require quanto
-RUN python3 -m pip install --no-cache-dir quanto
-
-# `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests
-# (`deformable_detr`, `rwkv`, `mra`)
-RUN python3 -m pip uninstall -y ninja
-
 # For `dinat` model
 # The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent)
 RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f https://shi-labs.com/natten/wheels
--- a/docker/transformers-pytorch-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-amd-gpu/Dockerfile
@ -1,19 +1,24 @@
-FROM rocm/dev-ubuntu-22.04:6.0.2
+FROM rocm/dev-ubuntu-20.04:5.6
 # rocm/pytorch has no version with 2.1.0
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive

+ARG PYTORCH='2.1.0'
+ARG TORCH_VISION='0.16.0'
+ARG TORCH_AUDIO='2.1.0'
+ARG ROCM='5.6'
+
 RUN apt update && \
-    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg && \
+    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip ffmpeg && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*

-RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
+RUN python3 -m pip install --no-cache-dir --upgrade pip

-RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
+RUN python3 -m pip install torch==$PYTORCH torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO --index-url https://download.pytorch.org/whl/rocm$ROCM

-RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
+RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"

 ARG REF=main
 WORKDIR /
@ -30,5 +35,5 @@ RUN python3 -m pip uninstall -y tensorflow flax
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop

-# Remove nvml as it is not compatible with ROCm. apex is not tested on NVIDIA either.
-RUN python3 -m pip uninstall py3nvml pynvml apex -y
+# Remove nvml as it is not compatible with ROCm
+RUN python3 -m pip uninstall py3nvml pynvml -y
--- a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
@ -22,7 +22,7 @@ RUN apt update && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*

-RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic>=2.0.0"
+RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic<2"
 RUN python3 -m pip uninstall -y apex torch torchvision torchaudio
 RUN python3 -m pip install torch==$PYTORCH torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO --index-url https://download.pytorch.org/whl/rocm$ROCM --no-cache-dir

--- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@ -49,5 +49,5 @@ RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed -
 RUN cd transformers && python3 setup.py develop

 # The base image ships with `pydantic==1.8.2` which is not working - i.e. the next command fails
-RUN python3 -m pip install -U --no-cache-dir "pydantic>=2.0.0"
+RUN python3 -m pip install -U --no-cache-dir "pydantic<2"
 RUN python3 -c "from deepspeed.launcher.runner import main"
--- a/docker/transformers-pytorch-gpu/Dockerfile
+++ b/docker/transformers-pytorch-gpu/Dockerfile
@ -11,7 +11,7 @@ ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF

 # If set to nothing, will install the latest version
-ARG PYTORCH='2.4.0'
+ARG PYTORCH='2.1.1'
 ARG TORCH_VISION=''
 ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@ -45,12 +45,6 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/opt
 # Add aqlm for quantization testing
 RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2

-# Add hqq for quantization testing
-RUN python3 -m pip install --no-cache-dir hqq
-
-# For GGUF tests
-RUN python3 -m pip install --no-cache-dir gguf
-
 # Add autoawq for quantization testing
 # >=v0.2.3 needed for compatibility with torch 2.2.1
 RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl
@ -63,4 +57,4 @@ RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git

 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
-RUN cd transformers && python3 setup.py develop
+RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-tensorflow-gpu/Dockerfile
+++ b/docker/transformers-tensorflow-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
--- a/docs/TRANSLATING.md
+++ b/docs/TRANSLATING.md
@ -54,4 +54,4 @@ The fields you should add are `local` (with the name of the file containing the

 Once you have translated the `_toctree.yml` file, you can start translating the [MDX](https://mdxjs.com/) files associated with your docs chapter.

-> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu.
+> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu and @MKhalusova.
--- a/docs/source/de/installation.md
+++ b/docs/source/de/installation.md
@ -162,7 +162,7 @@ Transformers verwendet die Shell-Umgebungsvariablen `PYTORCH_TRANSFORMERS_CACHE`

 ## Offline Modus

-Transformers ist in der Lage, in einer Firewall- oder Offline-Umgebung zu laufen, indem es nur lokale Dateien verwendet. Setzen Sie die Umgebungsvariable `HF_HUB_OFFLINE=1`, um dieses Verhalten zu aktivieren.
+Transformers ist in der Lage, in einer Firewall- oder Offline-Umgebung zu laufen, indem es nur lokale Dateien verwendet. Setzen Sie die Umgebungsvariable `TRANSFORMERS_OFFLINE=1`, um dieses Verhalten zu aktivieren.

 <Tip>

@ -179,7 +179,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
 Führen Sie das gleiche Programm in einer Offline-Instanz mit aus:

 ```bash
-HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
+HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
 python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
 ```

--- a/docs/source/de/peft.md
+++ b/docs/source/de/peft.md
@ -86,10 +86,10 @@ model.load_adapter(peft_model_id)
 Die `bitsandbytes`-Integration unterstützt Datentypen mit 8bit und 4bit Genauigkeit, was für das Laden großer Modelle nützlich ist, weil es Speicher spart (lesen Sie den `bitsandbytes`-Integrations [guide](./quantization#bitsandbytes-integration), um mehr zu erfahren). Fügen Sie die Parameter `load_in_8bit` oder `load_in_4bit` zu [`~PreTrainedModel.from_pretrained`] hinzu und setzen Sie `device_map="auto"`, um das Modell effektiv auf Ihre Hardware zu verteilen:

 ```py
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from transformers import AutoModelForCausalLM, AutoTokenizer

 peft_model_id = "ybelkada/opt-350m-lora"
-model = AutoModelForCausalLM.from_pretrained(peft_model_id, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+model = AutoModelForCausalLM.from_pretrained(peft_model_id, device_map="auto", load_in_8bit=True)
 ```

 ## Einen neuen Adapter hinzufügen
--- a/docs/source/de/run_scripts.md
+++ b/docs/source/de/run_scripts.md
@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.

 # Trainieren mit einem Skript

-Neben den 🤗 Transformers [notebooks](./notebooks) gibt es auch Beispielskripte, die zeigen, wie man ein Modell für eine Aufgabe mit [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch), [TensorFlow](https://github.com/huggingface/transformers/tree/main/examples/tensorflow) oder [JAX/Flax](https://github.com/huggingface/transformers/tree/main/examples/flax) trainiert.
+Neben den 🤗 Transformers [notebooks](./noteboks/README) gibt es auch Beispielskripte, die zeigen, wie man ein Modell für eine Aufgabe mit [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch), [TensorFlow](https://github.com/huggingface/transformers/tree/main/examples/tensorflow) oder [JAX/Flax](https://github.com/huggingface/transformers/tree/main/examples/flax) trainiert.

 Sie werden auch Skripte finden, die wir in unseren [Forschungsprojekten](https://github.com/huggingface/transformers/tree/main/examples/research_projects) und [Legacy-Beispielen](https://github.com/huggingface/transformers/tree/main/examples/legacy) verwendet haben und die größtenteils von der Community stammen. Diese Skripte werden nicht aktiv gepflegt und erfordern eine bestimmte Version von 🤗 Transformers, die höchstwahrscheinlich nicht mit der neuesten Version der Bibliothek kompatibel ist.

--- a/docs/source/de/testing.md
+++ b/docs/source/de/testing.md
@ -185,16 +185,16 @@ pytest -k "test and ada" tests/test_optimization.py

 Manchmal müssen Sie `accelerate` Tests für Ihre Modelle ausführen. Dazu fügen Sie einfach `-m accelerate_tests` zu Ihrem Befehl hinzu, wenn Sie diese Tests bei einem `OPT`-Lauf ausführen möchten:
 ```bash
-RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py
+RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py 
 ```


-### Dokumentationstests ausführen
+### Dokumentationstests ausführen 

-Um zu testen, ob die Dokumentationsbeispiele korrekt sind, sollten Sie überprüfen, ob die `doctests` erfolgreich sind.
-Lassen Sie uns als Beispiel den docstring von [WhisperModel.forward](https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L1017-L1035) verwenden:
+Um zu testen, ob die Dokumentationsbeispiele korrekt sind, sollten Sie überprüfen, ob die `doctests` erfolgreich sind. 
+Lassen Sie uns als Beispiel den docstring von [WhisperModel.forward](https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/modeling_whisper.py#L1017-L1035) verwenden: 

-```python
+```python 
 r"""
 Returns:

@ -217,8 +217,8 @@ Example:

 ```

-Führen Sie einfach die folgende Zeile aus, um automatisch jedes docstring-Beispiel in der gewünschten Datei zu testen:
-```bash
+Führen Sie einfach die folgende Zeile aus, um automatisch jedes docstring-Beispiel in der gewünschten Datei zu testen: 
+```bash 
 pytest --doctest-modules <path_to_file_or_dir>
 ```
 Wenn die Datei eine Markdown-Erweiterung hat, sollten Sie das Argument `--doctest-glob="*.md"` hinzufügen.
@ -862,7 +862,7 @@ Code, der fehlerhaft ist, einen schlechten Zustand verursacht, der sich auf ande
 - Hier sehen Sie, wie Sie einen ganzen Test bedingungslos überspringen können:

 ```python no-style
-@unittest.skip(reason="this bug needs to be fixed")
+@unittest.skip("this bug needs to be fixed")
 def test_feature_x():
 ```

--- a/docs/source/en/_redirects.yml
+++ b/docs/source/en/_redirects.yml
@ -1,5 +1,3 @@
 # Optimizing inference

 perf_infer_gpu_many: perf_infer_gpu_one
-transformers_agents: agents
-quantization: quantization/overview
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@ -23,14 +23,10 @@
    title: Load and train adapters with 🤗 PEFT
  - local: model_sharing
    title: Share your model
-  - local: agents
-    title: Agents 101
-  - local: agents_advanced
-    title: Agents, supercharged - Multi-agents, External tools, and more
+  - local: transformers_agents
+    title: Agents
  - local: llm_tutorial
    title: Generation with LLMs
-  - local: conversations
-    title: Chatting with Transformers
  title: Tutorials
 - sections:
  - isExpanded: false
@ -94,17 +90,11 @@
      title: Visual Question Answering
    - local: tasks/text-to-speech
      title: Text to speech
-    - local: tasks/image_text_to_text
-      title: Image-text-to-text
-    - local: tasks/video_text_to_text
-      title: Video-text-to-text
    title: Multimodal
  - isExpanded: false
    sections:
    - local: generation_strategies
      title: Customize the generation strategy
-    - local: kv_cache
-      title: Best Practices for Generation with Cache
    title: Generation
  - isExpanded: false
    sections:
@ -124,7 +114,7 @@
  - local: custom_models
    title: Share a custom model
  - local: chat_templating
-    title: Chat templates
+    title: Templates for chat models
  - local: trainer
    title: Trainer
  - local: sagemaker
@ -141,44 +131,20 @@
    title: Notebooks with examples
  - local: community
    title: Community resources
+  - local: custom_tools
+    title: Custom Tools and Prompts
  - local: troubleshooting
    title: Troubleshoot
-  - local: gguf
-    title: Interoperability with GGUF files
-  - local: tiktoken
-    title: Interoperability with TikToken files
-  title: Developer guides
- sections:
-  - local: quantization/overview
-    title: Getting started
-  - local: quantization/bitsandbytes
-    title: bitsandbytes
-  - local: quantization/gptq
-    title: GPTQ
-  - local: quantization/awq
-    title: AWQ
-  - local: quantization/aqlm
-    title: AQLM
-  - local: quantization/quanto
-    title: Quanto
-  - local: quantization/eetq
-    title: EETQ
-  - local: quantization/hqq
-    title: HQQ
-  - local: quantization/fbgemm_fp8
-    title: FBGEMM_FP8
-  - local: quantization/optimum
-    title: Optimum
-  - local: quantization/torchao
-    title: TorchAO
-  - local: quantization/contribute
+  - local: hf_quantizer
    title: Contribute new quantization method
-  title: Quantization Methods
+  title: Developer guides
 - sections:
  - local: performance
    title: Overview
  - local: llm_optims
    title: LLM inference optimization
+  - local: quantization
+    title: Quantization
  - sections:
    - local: perf_train_gpu_one
      title: Methods and tools for efficient training on a single GPU
@ -296,8 +262,6 @@
      title: Trainer
    - local: main_classes/deepspeed
      title: DeepSpeed
-    - local: main_classes/executorch
-      title: ExecuTorch
    - local: main_classes/feature_extractor
      title: Feature Extractor
    - local: main_classes/image_processor
@ -380,8 +344,6 @@
        title: ESM
      - local: model_doc/falcon
        title: Falcon
-      - local: model_doc/falcon_mamba
-        title: FalconMamba
      - local: model_doc/fastspeech2_conformer
        title: FastSpeech2Conformer
      - local: model_doc/flan-t5
@ -400,8 +362,6 @@
        title: Fuyu
      - local: model_doc/gemma
        title: Gemma
-      - local: model_doc/gemma2
-        title: Gemma2
      - local: model_doc/openai-gpt
        title: GPT
      - local: model_doc/gpt_neo
@ -420,16 +380,12 @@
        title: GPTSAN Japanese
      - local: model_doc/gpt-sw3
        title: GPTSw3
-      - local: model_doc/granite
-        title: Granite
      - local: model_doc/herbert
        title: HerBERT
      - local: model_doc/ibert
        title: I-BERT
      - local: model_doc/jamba
        title: Jamba
-      - local: model_doc/jetmoe
-        title: JetMoe
      - local: model_doc/jukebox
        title: Jukebox
      - local: model_doc/led
@ -452,8 +408,6 @@
        title: MADLAD-400
      - local: model_doc/mamba
        title: Mamba
-      - local: model_doc/mamba2
-        title: mamba2
      - local: model_doc/marian
        title: MarianMT
      - local: model_doc/markuplm
@ -484,8 +438,6 @@
        title: MT5
      - local: model_doc/mvp
        title: MVP
-      - local: model_doc/nemotron
-        title: Nemotron
      - local: model_doc/nezha
        title: NEZHA
      - local: model_doc/nllb
@ -496,8 +448,6 @@
        title: Nyströmformer
      - local: model_doc/olmo
        title: OLMo
-      - local: model_doc/olmoe
-        title: OLMoE
      - local: model_doc/open-llama
        title: Open-Llama
      - local: model_doc/opt
@ -522,12 +472,8 @@
        title: QDQBert
      - local: model_doc/qwen2
        title: Qwen2
-      - local: model_doc/qwen2_audio
-        title: Qwen2Audio
      - local: model_doc/qwen2_moe
        title: Qwen2MoE
-      - local: model_doc/qwen2_vl
-        title: Qwen2VL
      - local: model_doc/rag
        title: RAG
      - local: model_doc/realm
@ -611,8 +557,6 @@
        title: DeiT
      - local: model_doc/depth_anything
        title: Depth Anything
-      - local: model_doc/depth_anything_v2
-        title: Depth Anything V2
      - local: model_doc/deta
        title: DETA
      - local: model_doc/detr
@ -633,8 +577,6 @@
        title: FocalNet
      - local: model_doc/glpn
        title: GLPN
-      - local: model_doc/hiera
-        title: Hiera
      - local: model_doc/imagegpt
        title: ImageGPT
      - local: model_doc/levit
@ -663,8 +605,6 @@
        title: RegNet
      - local: model_doc/resnet
        title: ResNet
-      - local: model_doc/rt_detr
-        title: RT-DETR
      - local: model_doc/segformer
        title: SegFormer
      - local: model_doc/seggpt
@ -699,8 +639,6 @@
        title: ViTMSN
      - local: model_doc/yolos
        title: YOLOS
-      - local: model_doc/zoedepth
-        title: ZoeDepth
      title: Vision models
    - isExpanded: false
      sections:
@ -710,12 +648,8 @@
        title: Bark
      - local: model_doc/clap
        title: CLAP
-      - local: model_doc/dac
-        title: dac
      - local: model_doc/encodec
        title: EnCodec
-      - local: model_doc/hiera
-        title: Hiera
      - local: model_doc/hubert
        title: Hubert
      - local: model_doc/mctct
@ -790,8 +724,6 @@
        title: BridgeTower
      - local: model_doc/bros
        title: BROS
-      - local: model_doc/chameleon
-        title: Chameleon
      - local: model_doc/chinese_clip
        title: Chinese-CLIP
      - local: model_doc/clip
@ -820,8 +752,6 @@
        title: Idefics2
      - local: model_doc/instructblip
        title: InstructBLIP
-      - local: model_doc/instructblipvideo
-        title: InstructBlipVideo
      - local: model_doc/kosmos-2
        title: KOSMOS-2
      - local: model_doc/layoutlm
@ -838,10 +768,6 @@
        title: Llava
      - local: model_doc/llava_next
        title: LLaVA-NeXT
-      - local: model_doc/llava_next_video
-        title: LLaVa-NeXT-Video
-      - local: model_doc/llava_onevision
-        title: LLaVA-Onevision
      - local: model_doc/lxmert
        title: LXMERT
      - local: model_doc/matcha
@ -856,8 +782,6 @@
        title: OWL-ViT
      - local: model_doc/owlv2
        title: OWLv2
-      - local: model_doc/paligemma
-        title: PaliGemma
      - local: model_doc/perceiver
        title: Perceiver
      - local: model_doc/pix2struct
@ -878,8 +802,6 @@
        title: TVP
      - local: model_doc/udop
        title: UDOP
-      - local: model_doc/video_llava
-        title: VideoLlava
      - local: model_doc/vilt
        title: ViLT
      - local: model_doc/vipllava
--- a/docs/source/en/accelerate.md
+++ b/docs/source/en/accelerate.md
@ -46,7 +46,7 @@ The next step is to pass all the relevant training objects to the [`~accelerate.

 ## Backward

-The last addition is to replace the typical `loss.backward()` in your training loop with 🤗 Accelerate's [`~accelerate.Accelerator.backward`] method:
+The last addition is to replace the typical `loss.backward()` in your training loop with 🤗 Accelerate's [`~accelerate.Accelerator.backward`]method:

 ```py
 >>> for epoch in range(num_epochs):
--- a/docs/source/en/agents.md
+++ b/docs/source/en/agents.md
@ -1,457 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-# Agents and tools
-
-[[open-in-colab]]
-
-### What is an agent?
-
-Large Language Models (LLMs) trained to perform [causal language modeling](./tasks/language_modeling.) can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to.
-
-One approach to overcome this weakness is to create an *agent*.
-
-An agent is a system that uses an LLM as its engine, and it has access to functions called *tools*.
-
-These *tools* are functions for performing a task, and they contain all necessary description for the agent to properly use them.
-
-The agent can be programmed to:
- devise a series of actions/tools and run them all at once,  like the [`CodeAgent`]
- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one, like the [`ReactJsonAgent`]
-
-### Types of agents
-
-#### Code agent
-
-This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks.
-
-#### React agents
-
-This is the go-to agent to solve reasoning tasks, since the ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) makes it really efficient to think on the basis of its previous observations.
-
-We implement two versions of ReactJsonAgent: 
- [`ReactJsonAgent`] generates tool calls as a JSON in its output.
- [`ReactCodeAgent`] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
-
-> [!TIP]
-> Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about ReAct agents.
-
-<div class="flex justify-center">
-    <img
-        class="block dark:hidden"
-        src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"
-    />
-    <img
-        class="hidden dark:block"
-        src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"
-    />
-</div>
-
-![Framework of a React Agent](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png)
-
-For example, here is how a ReAct Code agent would work its way through the following question.
-
-```py3
->>> agent.run(
-...     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
-... )
-=====New task=====
-How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
-====Agent is executing the code below:
-bert_blocks = search(query="number of blocks in BERT base encoder")
-print("BERT blocks:", bert_blocks)
-====
-Print outputs:
-BERT blocks: twelve encoder blocks
-
-====Agent is executing the code below:
-attention_layer = search(query="number of layers in Attention is All You Need")
-print("Attention layers:", attention_layer)
-====
-Print outputs:
-Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
-
-====Agent is executing the code below:
-bert_blocks = 12
-attention_layers = 6
-diff = bert_blocks - attention_layers
-print("Difference in blocks:", diff)
-final_answer(diff)
-====
-
-Print outputs:
-Difference in blocks: 6
-
-Final answer: 6
-```
-
-### How can I build an agent?
-
-To initialize an agent, you need these arguments:
-
- an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine.
- a system prompt: what the LLM engine will be prompted with to generate its output
- a toolbox from which the agent pick tools to execute
- a parser to extract from the LLM output which tools are to call and with which arguments
-
-Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s `system_prompt` to let it know which tools it can use and why.
-
-To start with, please install the `agents` extras in order to install all default dependencies.
-
-```bash
-pip install transformers[agents]
-```
-
-Build your LLM engine by defining a `llm_engine` method which accepts a list of [messages](./chat_templating.) and returns text. This callable also needs to accept a `stop` argument that indicates when to stop generating.
-
-```python
-from huggingface_hub import login, InferenceClient
-
-login("<YOUR_HUGGINGFACEHUB_API_TOKEN>")
-
-client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
-
-def llm_engine(messages, stop_sequences=["Task"]) -> str:
-    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
-    answer = response.choices[0].message.content
-    return answer
-```
-
-You could use any `llm_engine` method as long as:
-1. it follows the [messages format](./chat_templating.md) (`List[Dict[str, str]]`) for its input `messages`, and it returns a `str`.
-2. it stops generating outputs at the sequences passed in the argument `stop_sequences`
-
-Additionally, `llm_engine` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to llm_engine, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs.
-
-You will also need a `tools` argument which accepts a list of `Tools` - it can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`.
-
-Now you can create an agent, like [`CodeAgent`], and run it. You can also create a [`TransformersEngine`] with a pre-initialized pipeline to run inference on your local machine using `transformers`.
-For convenience, since agentic behaviours generally require stronger models such as `Llama-3.1-70B-Instruct` that are harder to run locally for now, we also provide the [`HfApiEngine`] class that initializes a `huggingface_hub.InferenceClient` under the hood. 
-
-```python
-from transformers import CodeAgent, HfApiEngine
-
-llm_engine = HfApiEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
-agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
-
-agent.run(
-    "Could you translate this sentence from French, say it out loud and return the audio.",
-    sentence="Où est la boulangerie la plus proche?",
-)
-```
-
-This will be handy in case of emergency baguette need!
-You can even leave the argument `llm_engine` undefined, and an [`HfApiEngine`] will be created by default.
-
-```python
-from transformers import CodeAgent
-
-agent = CodeAgent(tools=[], add_base_tools=True)
-
-agent.run(
-    "Could you translate this sentence from French, say it out loud and give me the audio.",
-    sentence="Où est la boulangerie la plus proche?",
-)
-```
-
-Note that we used an additional `sentence` argument: you can pass text as additional arguments to the model.
-
-You can also use this to indicate the path to local or remote files for the model to use:
-
-```py
-from transformers import ReactCodeAgent
-
-agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
-
-agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
-```
-
-
-The prompt and output parser were automatically defined, but you can easily inspect them by calling the `system_prompt_template` on your agent.
-
-```python
-print(agent.system_prompt_template)
-```
-
-It's important to explain as clearly as possible the task you want to perform.
-Every [`~Agent.run`] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results.
-You can also run an agent consecutively for different tasks: each time the attributes `agent.task` and `agent.logs` will be re-initialized.
-
-
-#### Code execution
-
-A Python interpreter executes the code on a set of inputs passed along with your tools.
-This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
-
-The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
-You can still authorize additional imports by passing the authorized modules as a list of strings in argument `additional_authorized_imports` upon initialization of your [`ReactCodeAgent`] or [`CodeAgent`]:
-
-```py
->>> from transformers import ReactCodeAgent
-
->>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
->>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
-
-(...)
-'Hugging Face – Blog'
-```
-
-The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
-
-> [!WARNING]
-> The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
-
-### The system prompt
-
-An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [`ReactCodeAgent`] (below version is slightly simplified).
-
-```text
-You will be given a task to solve as best you can.
-You have access to the following tools:
-<<tool_descriptions>>
-
-To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
-
-At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
-Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
-During each intermediate step, you can use 'print()' to save whatever important information you will then need.
-These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
-
-In the end you have to return a final answer using the `final_answer` tool.
-
-Here are a few examples using notional tools:
---
-{examples}
-
-Above example were using notional tools that might not exist for you. You only have acces to those tools:
-<<tool_names>>
-You also can perform computations in the python code you generate.
-
-Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```<end_code>' sequence. You MUST provide at least the 'Code:' sequence to move forward.
-
-Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
-Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
-
-Remember to make sure that variables you use are all defined.
-
-Now Begin!
-```
-
-The system prompt includes:
- An *introduction* that explains how the agent should behave and what tools are.
- A description of all the tools that is defined by a `<<tool_descriptions>>` token that is dynamically replaced at runtime with the tools defined/chosen by the user.
-    - The tool description comes from the tool attributes, `name`, `description`, `inputs` and `output_type`,  and a simple `jinja2` template that you can refine.
- The expected output format.
-
-You could improve the system prompt, for example, by adding an explanation of the output format.
-
-For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter.
-
-```python
-from transformers import ReactJsonAgent
-from transformers.agents import PythonInterpreterTool
-
-agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
-```
-
-> [!WARNING]
-> Please make sure to define the `<<tool_descriptions>>` string somewhere in the `template` so the agent is aware 
-of the available tools.
-
-
-### Inspecting an agent run
-
-Here are a few useful attributes to inspect what happened after a run:
- `agent.logs` stores the fine-grained logs of the agent. At every step of the agent's run, everything gets stored in a dictionary that then is appended to `agent.logs`.
- Running `agent.write_inner_memory_from_logs()` creates an inner memory of the agent's logs for the LLM to view, as a list of chat messages. This method goes over each step of the log and only stores what it's interested in as a message: for instance, it will save the system prompt and task in separate messages, then for each step it will store the LLM output as a message, and the tool call output as another message. Use this if you want a higher-level view of what has happened - but not every log will be transcripted by this method.
-
-## Tools
-
-A tool is an atomic function to be used by an agent.
-
-You can for instance check the [`PythonInterpreterTool`]: it has a name, a description, input descriptions, an output type, and a `__call__` method to perform the action.
-
-When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why.
-
-### Default toolbox
-
-Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools = True`:
-
- **Document question answering**: given a document (such as a PDF) in image format, answer a question on this document ([Donut](./model_doc/donut))
- **Image question answering**: given an image, answer a question on this image ([VILT](./model_doc/vilt))
- **Speech to text**: given an audio recording of a person talking, transcribe the speech into text ([Whisper](./model_doc/whisper))
- **Text to speech**: convert text to speech ([SpeechT5](./model_doc/speecht5))
- **Translation**: translates a given sentence from source language to target language.
- **DuckDuckGo search***: performs a web search using DuckDuckGo browser.
- **Python code interpreter**: runs your the LLM generated Python code in a secure environment. This tool will only be added to [`ReactJsonAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code
-
-
-You can manually use a tool by calling the [`load_tool`] function and a task to perform.
-
-
-```python
-from transformers import load_tool
-
-tool = load_tool("text-to-speech")
-audio = tool("This is a text to speech tool")
-```
-
-
-### Create a new tool
-
-You can create your own tool for use cases not covered by the default tools from Hugging Face.
-For example, let's create a tool that returns the most downloaded model for a given task from the Hub.
-
-You'll start with the code below.
-
-```python
-from huggingface_hub import list_models
-
-task = "text-classification"
-
-model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
-print(model.id)
-```
-
-This code can be converted into a class that inherits from the [`Tool`] superclass.
-
-
-The custom tool needs:
- An attribute `name`, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is `model_download_counter`.
- An attribute `description` is used to populate the agent's system prompt.
- An `inputs` attribute, which is a dictionary with keys `"type"` and `"description"`. It contains information that helps the Python interpreter make educated choices about the input.
- An `output_type` attribute, which specifies the output type.
- A `forward` method which contains the inference code to be executed.
-
-
-```python
-from transformers import Tool
-from huggingface_hub import list_models
-
-class HFModelDownloadsTool(Tool):
-    name = "model_download_counter"
-    description = (
-        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
-        "It returns the name of the checkpoint."
-    )
-
-    inputs = {
-        "task": {
-            "type": "text",
-            "description": "the task category (such as text-classification, depth-estimation, etc)",
-        }
-    }
-    output_type = "text"
-
-    def forward(self, task: str):
-        model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
-        return model.id
-```
-
-Now that the custom `HfModelDownloadsTool` class is ready, you can save it to a file named `model_downloads.py` and import it for use.
-
-
-```python
-from model_downloads import HFModelDownloadsTool
-
-tool = HFModelDownloadsTool()
-```
-
-You can also share your custom tool to the Hub by calling [`~Tool.push_to_hub`] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
-
-```python
-tool.push_to_hub("{your_username}/hf-model-downloads")
-```
-
-Load the tool with the [`~Tool.load_tool`] function and pass it to the `tools` parameter in your agent.
-
-```python
-from transformers import load_tool, CodeAgent
-
-model_download_tool = load_tool("m-ric/hf-model-downloads")
-agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
-agent.run(
-    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
-)
-```
-
-You get the following:
-```text
-======== New task ========
-Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
-==== Agent is executing the code below:
-most_downloaded_model = model_download_counter(task="text-to-video")
-print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
-====
-```
-
-And the output:
-`"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."`
-
-
-### Manage your agent's toolbox
-
-If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
-
-Let's add the `model_download_tool` to an existing agent initialized with only the default toolbox.
-
-```python
-from transformers import CodeAgent
-
-agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
-agent.toolbox.add_tool(model_download_tool)
-```
-Now we can leverage both the new tool and the previous text-to-speech tool:
-
-```python
-agent.run(
-    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
-)
-```
-
-
-| **Audio**                                                                                                                                            |
-|------------------------------------------------------------------------------------------------------------------------------------------------------|
-| <audio controls><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/damo.wav" type="audio/wav"/> |
-
-
-> [!WARNING]
-> Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
-
-
-Use the `agent.toolbox.update_tool()` method to replace an existing tool in the agent's toolbox.
-This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
-Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
-
-
-### Use a collection of tools
-
-You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
-Then pass them as a list to initialize you agent, and start using them!
-
-```py
-from transformers import ToolCollection, ReactCodeAgent
-
-image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
-agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
-
-agent.run("Please draw me a picture of rivers and lakes.")
-```
-
-To speed up the start, tools are loaded only if called by the agent.
-
-This gets you this image:
-
-<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png">
--- a/docs/source/en/agents_advanced.md
+++ b/docs/source/en/agents_advanced.md
@ -1,182 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-# Agents, supercharged - Multi-agents, External tools, and more
-
-[[open-in-colab]]
-
-### What is an agent?
-
-> [!TIP]
-> If you're new to `transformers.agents`, make sure to first read the main [agents documentation](./agents).
-
-In this page we're going to highlight several advanced uses of `transformers.agents`.
-
-## Multi-agents
-
-Multi-agent has been introduced in Microsoft's framework [Autogen](https://huggingface.co/papers/2308.08155).
-It simply means having several agents working together to solve your task instead of only one.
-It empirically yields better performance on most benchmarks. The reason for this better performance is conceptually simple: for many tasks, rather than using a do-it-all system, you would prefer to specialize units on sub-tasks. Here, having agents with separate tool sets and memories allows to achieve efficient specialization.
-
-You can easily build hierarchical multi-agent systems with `transformers.agents`.
-
-To do so, encapsulate the agent in a [`ManagedAgent`] object. This object needs arguments `agent`, `name`, and a `description`, which will then be embedded in the manager agent's system prompt to let it know how to call this managed agent, as we also do for tools.
-
-Here's an example of making an agent that managed a specitif web search agent using our [`DuckDuckGoSearchTool`]:
-
-```py
-from transformers.agents import ReactCodeAgent, HfApiEngine, DuckDuckGoSearchTool, ManagedAgent
-
-llm_engine = HfApiEngine()
-
-web_agent = ReactCodeAgent(tools=[DuckDuckGoSearchTool()], llm_engine=llm_engine)
-
-managed_web_agent = ManagedAgent(
-    agent=web_agent,
-    name="web_search",
-    description="Runs web searches for you. Give it your query as an argument."
-)
-
-manager_agent = ReactCodeAgent(
-    tools=[], llm_engine=llm_engine, managed_agents=[managed_web_agent]
-)
-
-manager_agent.run("Who is the CEO of Hugging Face?")
-```
-
-> [!TIP]
-> For an in-depth example of an efficient multi-agent implementation, see [how we pushed our multi-agent system to the top of the GAIA leaderboard](https://huggingface.co/blog/beating-gaia).
-
-
-## Use tools from gradio or LangChain
-
-### Use gradio-tools
-
-[gradio-tools](https://github.com/freddyaboulton/gradio-tools) is a powerful library that allows using Hugging
-Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
-
-Transformers supports `gradio_tools` with the [`Tool.from_gradio`] method. For example, let's use the [`StableDiffusionPromptGeneratorTool`](https://github.com/freddyaboulton/gradio-tools/blob/main/gradio_tools/tools/prompt_generator.py) from `gradio-tools` toolkit for improving prompts to generate better images.
-
-Import and instantiate the tool, then pass it to the `Tool.from_gradio` method:
-
-```python
-from gradio_tools import StableDiffusionPromptGeneratorTool
-from transformers import Tool, load_tool, CodeAgent
-
-gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
-prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
-```
-
-Now you can use it just like any other tool. For example, let's improve the prompt  `a rabbit wearing a space suit`.
-
-```python
-image_generation_tool = load_tool('huggingface-tools/text-to-image')
-agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
-
-agent.run(
-    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
-)
-```
-
-The model adequately leverages the tool:
-```text
-======== New task ========
-Improve this prompt, then generate an image of it.
-You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
-==== Agent is executing the code below:
-improved_prompt = StableDiffusionPromptGenerator(query=prompt)
-while improved_prompt == "QUEUE_FULL":
-    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
-print(f"The improved prompt is {improved_prompt}.")
-image = image_generator(prompt=improved_prompt)
-====
-```
-
-Before finally generating the image:
-
-<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png">
-
-
-> [!WARNING]
-> gradio-tools require *textual* inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
-
-### Use LangChain tools
-
-We love Langchain and think it has a very compelling suite of tools.
-To import a tool from LangChain, use the `from_langchain()` method.
-
-Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
-
-```python
-from langchain.agents import load_tools
-from transformers import Tool, ReactCodeAgent
-
-search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
-
-agent = ReactCodeAgent(tools=[search_tool])
-
-agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
-```
-
-## Display your agent run in a cool Gradio interface
-
-You can leverage `gradio.Chatbot`to display your agent's thoughts using `stream_to_gradio`, here is an example:
-
-```py
-import gradio as gr
-from transformers import (
-    load_tool,
-    ReactCodeAgent,
-    HfApiEngine,
-    stream_to_gradio,
-)
-
-# Import tool from Hub
-image_generation_tool = load_tool("m-ric/text-to-image")
-
-llm_engine = HfApiEngine("meta-llama/Meta-Llama-3-70B-Instruct")
-
-# Initialize the agent with the image generation tool
-agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
-
-
-def interact_with_agent(task):
-    messages = []
-    messages.append(gr.ChatMessage(role="user", content=task))
-    yield messages
-    for msg in stream_to_gradio(agent, task):
-        messages.append(msg)
-        yield messages + [
-            gr.ChatMessage(role="assistant", content="⏳ Task not finished yet!")
-        ]
-    yield messages
-
-
-with gr.Blocks() as demo:
-    text_input = gr.Textbox(lines=1, label="Chat Message", value="Make me a picture of the Statue of Liberty.")
-    submit = gr.Button("Run illustrator agent!")
-    chatbot = gr.Chatbot(
-        label="Agent",
-        type="messages",
-        avatar_images=(
-            None,
-            "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
-        ),
-    )
-    submit.click(interact_with_agent, [text_input], [chatbot])
-
-if __name__ == "__main__":
-    demo.launch()
-```
--- a/docs/source/en/autoclass_tutorial.md
+++ b/docs/source/en/autoclass_tutorial.md
@ -110,7 +110,7 @@ Now you can access the `feature_maps` object from the first stage of the backbon

 ## AutoFeatureExtractor

-For audio tasks, a feature extractor processes the audio signal into the correct input format.
+For audio tasks, a feature extractor processes the audio signal the correct input format.

 Load a feature extractor with [`AutoFeatureExtractor.from_pretrained`]:

--- a/docs/source/en/benchmarks.md
+++ b/docs/source/en/benchmarks.md
@ -35,7 +35,7 @@ The classes [`PyTorchBenchmark`] and [`TensorFlowBenchmark`] allow to flexibly b

 <Tip>

-Here, _inference_ is defined by a single forward pass, and _training_ is defined by a single forward pass and
+Hereby, _inference_ is defined by a single forward pass, and _training_ is defined by a single forward pass and
 backward pass.

 </Tip>
@ -368,7 +368,7 @@ This section lists a couple of best practices one should be aware of when benchm
  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
  `no_multi_processing` is set to `True`.
 - One should always state the environment information when sharing the results of a model benchmark. Results can vary
-  heavily between different GPU devices, library versions, etc., as a consequence, benchmark results on their own are not very
+  heavily between different GPU devices, library versions, etc., so that benchmark results on their own are not very
  useful for the community.


--- a/docs/source/en/bertology.md
+++ b/docs/source/en/bertology.md
@ -37,5 +37,5 @@ help people access the inner representations, mainly adapted from the great work
 - retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
  in https://arxiv.org/abs/1905.10650.

-To help you understand and use these features, we have added a specific example script: [bertology.py](https://github.com/huggingface/transformers/tree/main/examples/research_projects/bertology/run_bertology.py) which extracts information and prune a model pre-trained on
+To help you understand and use these features, we have added a specific example script: [bertology.py](https://github.com/huggingface/transformers/tree/main/examples/research_projects/bertology/run_bertology.py) while extract information and prune a model pre-trained on
 GLUE.
--- a/docs/source/en/chat_templating.md
+++ b/docs/source/en/chat_templating.md
@ -14,7 +14,7 @@ rendered properly in your Markdown viewer.

 -->

-# Chat Templates
+# Templates for Chat Models

 ## Introduction

@ -26,7 +26,26 @@ Much like tokenization, different models expect very different input formats for
 **chat templates** as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, 
 represented as lists of messages, into a single tokenizable string in the format that the model expects. 

-Let's make this concrete with a quick example using the `mistralai/Mistral-7B-Instruct-v0.1` model:
+Let's make this concrete with a quick example using the `BlenderBot` model. BlenderBot has an extremely simple default 
+template, which mostly just adds whitespace between rounds of dialogue:
+
+```python
+>>> from transformers import AutoTokenizer
+>>> tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+
+>>> chat = [
+...    {"role": "user", "content": "Hello, how are you?"},
+...    {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+...    {"role": "user", "content": "I'd like to show off how chat templating works!"},
+... ]
+
+>>> tokenizer.apply_chat_template(chat, tokenize=False)
+" Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!</s>"
+```
+
+Notice how the entire chat is condensed into a single string. If we use `tokenize=True`, which is the default setting,
+that string will also be tokenized for us. To see a more complex template in action, though, let's use the 
+`mistralai/Mistral-7B-Instruct-v0.1` model.

 ```python
 >>> from transformers import AutoTokenizer
@ -42,26 +61,8 @@ Let's make this concrete with a quick example using the `mistralai/Mistral-7B-In
 "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"
 ```

-Notice how the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
-user messages (but not assistant messages!), and the entire chat is condensed into a single string. 
-If we use `tokenize=True`, which is the default setting, that string will also be tokenized for us.
-
-Now, try the same code, but swap in the `HuggingFaceH4/zephyr-7b-beta` model instead, and you should get:
-
-```text
-<|user|>
-Hello, how are you?</s>
-<|assistant|>
-I'm doing great. How can I help you today?</s>
-<|user|>
-I'd like to show off how chat templating works!</s>
-```
-
-Both Zephyr and Mistral-Instruct were fine-tuned from the same base model, `Mistral-7B-v0.1`. However, they were trained
-with totally different chat formats. Without chat templates, you would have to write manual formatting code for each
-model, and it's very easy to make minor errors that hurt performance! Chat templates handle the details of formatting 
-for you, allowing you to write universal code that works for any model.
-
+Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
+user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.

 ## How do I use chat templates?

@ -70,7 +71,7 @@ and `content` keys, and then pass it to the [`~PreTrainedTokenizer.apply_chat_te
 you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
 to use `add_generation_prompt=True` to add a [generation prompt](#what-are-generation-prompts). 

-Here's an example of preparing input for `model.generate()`, using `Zephyr` again:
+Here's an example of preparing input for `model.generate()`, using the `Zephyr` assistant model:

 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
@ -159,7 +160,7 @@ messages = [
 ]
 ```

-Here's what this will look like without a generation prompt, for a model that uses standard "ChatML" formatting:
+Here's what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:

 ```python
 tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
@ -192,51 +193,13 @@ message. Remember, chat models are still just language models - they're trained
 special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
 supposed to be doing.

-Not all models require generation prompts. Some models, like LLaMA, don't have any
+Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don't have any
 special tokens before bot responses. In these cases, the `add_generation_prompt` argument will have no effect. The exact
 effect that `add_generation_prompt` has will depend on the template being used.

-## What does "continue_final_message" do?
-
-When passing a list of messages to `apply_chat_template` or `TextGenerationPipeline`, you can choose
-to format the chat so the model will continue the final message in the chat instead of starting a new one. This is done
-by removing any end-of-sequence tokens that indicate the end of the final message, so that the model will simply
-extend the final message when it begins to generate text. This is useful for "prefilling" the model's response. 
-
-Here's an example:
-
-```python
-chat = [
-    {"role": "user", "content": "Can you format the answer in JSON?"},
-    {"role": "assistant", "content": '{"name": "'},
-]
-
-formatted_chat = tokenizer.apply_chat_template(chat, tokenize=True, return_dict=True, continue_final_message=True)
-model.generate(**formatted_chat)
-```
-
-The model will generate text that continues the JSON string, rather than starting a new message. This approach
-can be very useful for improving the accuracy of the model's instruction-following when you know how you want
-it to start its replies.
-
-Because `add_generation_prompt` adds the tokens that start a new message, and `continue_final_message` removes any
-end-of-message tokens from the final message, it does not make sense to use them together. As a result, you'll
-get an error if you try!
-
-<Tip>
-
-The default behaviour of `TextGenerationPipeline` is to set `add_generation_prompt=True` so that it starts a new
-message. However, if the final message in the input chat has the "assistant" role, it will assume that this message is 
-a prefill and switch to `continue_final_message=True` instead, because most models do not support multiple 
-consecutive assistant messages. You can override this behaviour by explicitly passing the `continue_final_message` 
-argument when calling the pipeline.
-
-</Tip>
-
 ## Can I use chat templates in training?

-Yes! This is a good way to ensure that the chat template matches the tokens the model sees during training.
-We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
+Yes! We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
 can simply continue like any other language model training task. When training, you should usually set 
 `add_generation_prompt=False`, because the added tokens to prompt an assistant response will not be helpful during 
 training. Let's see an example:
@ -270,382 +233,37 @@ The sun.</s>

 From here, just continue training like you would with a standard language modelling task, using the `formatted_chat` column.

-<Tip>
-
-By default, some tokenizers add special tokens like `<bos>` and `<eos>` to text they tokenize. Chat templates should 
-already include all the special tokens they need, and so additional special tokens will often be incorrect or 
-duplicated, which will hurt model performance.
-
-Therefore, if you format text with `apply_chat_template(tokenize=False)`, you should set the argument
-`add_special_tokens=False` when you tokenize that text later. If you use `apply_chat_template(tokenize=True)`, you don't need to worry about this!
-
-</Tip>
-
-## Advanced: Extra inputs to chat templates
-
-The only argument that `apply_chat_template` requires is `messages`. However, you can pass any keyword
-argument to `apply_chat_template` and it will be accessible inside the template. This gives you a lot of freedom to use
-chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
-strings, lists, dicts or whatever else you want. 
-
-That said, there are some common use-cases for these extra arguments,
-such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
-we have some opinionated recommendations about what the names and formats of these arguments should be, which are
-described in the sections below. We encourage model authors to make their chat templates compatible with this format,
-to make it easy to transfer tool-calling code between models.
-
-## Advanced: Tool use / function calling
-
-"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
-to a tool-use model, you can simply pass a list of functions to the `tools` argument:
-
-```python
-import datetime
-
-def current_time():
-    """Get the current local time as a string."""
-    return str(datetime.now())
-
-def multiply(a: float, b: float):
-    """
-    A function that multiplies two numbers
-    
-    Args:
-        a: The first number to multiply
-        b: The second number to multiply
-    """
-    return a * b
-
-tools = [current_time, multiply]
-
-model_input = tokenizer.apply_chat_template(
-    messages,
-    tools=tools
-)
-```
-
-In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
-correctly as tools. Specifically, you should follow these rules:
-
- The function should have a descriptive name
- Every argument must have a type hint
- The function must have a docstring in the standard Google style (in other words, an initial function description  
-  followed by an `Args:` block that describes the arguments, unless the function does not have any arguments. 
- Do not include types in the `Args:` block. In other words, write `a: The first number to multiply`, not
-  `a (int): The first number to multiply`. Type hints should go in the function header instead.
- The function can have a return type and a `Returns:` block in the docstring. However, these are optional
-  because most tool-use models ignore them.
-
-### Passing tool results to the model
-
-The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
-one? If that happens, you should:
-
-1. Parse the model's output to get the tool name(s) and arguments.
-2. Add the model's tool call(s) to the conversation.
-3. Call the corresponding function(s) with those arguments.
-4. Add the result(s) to the conversation
-
-### A complete tool use example
-
-Let's walk through a tool use example, step by step. For this example, we will use an 8B `Hermes-2-Pro` model,
-as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
-memory, you can consider using a larger model instead like [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01)
-or [Mixtral-8x22B](https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1), both of which also support tool use
-and offer even stronger performance.
-
-First, let's load our model and tokenizer:
-
-```python
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
-
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
-```
-
-Next, let's define a list of tools:
-
-```python
-def get_current_temperature(location: str, unit: str) -> float:
-    """
-    Get the current temperature at a location.
-    
-    Args:
-        location: The location to get the temperature for, in the format "City, Country"
-        unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
-    Returns:
-        The current temperature at the specified location in the specified units, as a float.
-    """
-    return 22.  # A real function should probably actually get the temperature!
-
-def get_current_wind_speed(location: str) -> float:
-    """
-    Get the current wind speed in km/h at a given location.
-    
-    Args:
-        location: The location to get the temperature for, in the format "City, Country"
-    Returns:
-        The current wind speed at the given location in km/h, as a float.
-    """
-    return 6.  # A real function should probably actually get the wind speed!
-
-tools = [get_current_temperature, get_current_wind_speed]
-```
-
-Now, let's set up a conversation for our bot:
-
-```python
-messages = [
-  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
-  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
-]
-```
-
-Now, let's apply the chat template and generate a response:
-
-```python
-inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
-inputs = {k: v.to(model.device) for k, v in inputs.items()}
-out = model.generate(**inputs, max_new_tokens=128)
-print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
-```
-
-And we get:
-
-```text
-<tool_call>
-{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
-</tool_call><|im_end|>
-```
-
-The model has called the function with valid arguments, in the format requested by the function docstring. It has
-inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
-the temperature in France should certainly be displayed in Celsius.
-
-<Tip>
-
-The output format above is specific to the `Hermes-2-Pro` model we're using in this example. Other models may emit different
-tool call formats, and you may need to do some manual parsing at this step. For example, `Llama-3.1` models will emit
-slightly different JSON, with `parameters` instead of `arguments`. Regardless of the format the model outputs, you 
-should add the tool call to the conversation in the format below, with `tool_calls`, `function` and `arguments` keys. 
-
-</Tip>
-
-Next, let's append the model's tool call to the conversation.
-
-```python
-tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
-messages.append({"role": "assistant", "tool_calls": [{"type": "function", "function": tool_call}]})
-```
-
-<Tip warning={true}>
-
-If you're familiar with the OpenAI API, you should pay attention to an important difference here - the `tool_call` is
-a dict, but in the OpenAI API it's a JSON string. Passing a string may cause errors or strange model behaviour!
-
-</Tip>
-
-Now that we've added the tool call to the conversation, we can call the function and append the result to the
-conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
-that result directly.
-
-```python
-messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"})
-```
-
-<Tip>
-
-Some model architectures, notably Mistral/Mixtral, also require a `tool_call_id` here, which should be
-9 randomly-generated alphanumeric characters, and assigned to the `id` key of the tool call
-dictionary. The same key should also be assigned to the `tool_call_id` key of the tool response dictionary below, so 
-that tool calls can be matched to tool responses. So, for Mistral/Mixtral models, the code above would be:
-
-```python
-tool_call_id = "9Ae3bDc2F"  # Random ID, 9 alphanumeric characters
-tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
-messages.append({"role": "assistant", "tool_calls": [{"type": "function", "id": tool_call_id, "function": tool_call}]})
-```
-
-and
-
-```python
-messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
-```
-
-</Tip>
-
-Finally, let's let the assistant read the function outputs and continue chatting with the user:
-
-```python
-inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
-inputs = {k: v.to(model.device) for k, v in inputs.items()}
-out = model.generate(**inputs, max_new_tokens=128)
-print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
-```
-
-And we get:
-
-```text
-The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
-```
-
-Although this was a simple demo with dummy tools and a single call, the same technique works with 
-multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
-agents with real-time information, computational tools like calculators, or access to large databases.
-
-### Understanding tool schemas
-
-Each function you pass to the `tools` argument of `apply_chat_template` is converted into a 
-[JSON schema](https://json-schema.org/learn/getting-started-step-by-step). These schemas
-are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
-never see the actual code inside them. What they care about is the function **definitions** and the **arguments** they
-need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
-to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
-return the response in the chat.
-
-Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
-follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
-you can handle the conversion manually. Here is an example of a manual schema conversion.
-
-```python
-from transformers.utils import get_json_schema
-
-def multiply(a: float, b: float):
-    """
-    A function that multiplies two numbers
-    
-    Args:
-        a: The first number to multiply
-        b: The second number to multiply
-    """
-    return a * b
-
-schema = get_json_schema(multiply)
-print(schema)
-```
-
-This will yield:
-
-```json
-{
-  "type": "function", 
-  "function": {
-    "name": "multiply", 
-    "description": "A function that multiplies two numbers", 
-    "parameters": {
-      "type": "object", 
-      "properties": {
-        "a": {
-          "type": "number", 
-          "description": "The first number to multiply"
-        }, 
-        "b": {
-          "type": "number",
-          "description": "The second number to multiply"
-        }
-      }, 
-      "required": ["a", "b"]
-    }
-  }
-}
-```
-
-If you wish, you can edit these schemas, or even write them from scratch yourself without using `get_json_schema` at 
-all. JSON schemas can be passed directly to the `tools` argument of 
-`apply_chat_template` - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
-though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
-recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
-to a minimum.
-
-Here is an example of defining schemas by hand, and passing them directly to `apply_chat_template`:
-
-```python
-# A simple function that takes no arguments
-current_time = {
-  "type": "function", 
-  "function": {
-    "name": "current_time",
-    "description": "Get the current local time as a string.",
-    "parameters": {
-      'type': 'object',
-      'properties': {}
-    }
-  }
-}
-
-# A more complete function that takes two numerical arguments
-multiply = {
-  'type': 'function',
-  'function': {
-    'name': 'multiply',
-    'description': 'A function that multiplies two numbers', 
-    'parameters': {
-      'type': 'object', 
-      'properties': {
-        'a': {
-          'type': 'number',
-          'description': 'The first number to multiply'
-        }, 
-        'b': {
-          'type': 'number', 'description': 'The second number to multiply'
-        }
-      }, 
-      'required': ['a', 'b']
-    }
-  }
-}
-
-model_input = tokenizer.apply_chat_template(
-    messages,
-    tools = [current_time, multiply]
-)
-```
-
-## Advanced: Retrieval-augmented generation
-
-"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
-to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
-recommendation for RAG models is that their template
-should accept a `documents` argument. This should be a list of documents, where each "document"
-is a single dict with `title` and `contents` keys, both of which are strings. Because this format is much simpler
-than the JSON schemas used for tools, no helper functions are necessary.
-
-Here's an example of a RAG template in action:
-
-```python
-document1 = {
-    "title": "The Moon: Our Age-Old Foe",
-    "contents": "Man has always dreamed of destroying the moon. In this essay, I shall..."
-}
-
-document2 = {
-    "title": "The Sun: Our Age-Old Friend",
-    "contents": "Although often underappreciated, the sun provides several notable benefits..."
-}
-
-model_input = tokenizer.apply_chat_template(
-    messages,
-    documents=[document1, document2]
-)
-```
-
 ## Advanced: How do chat templates work?

 The chat template for a model is stored on the `tokenizer.chat_template` attribute. If no chat template is set, the
-default template for that model class is used instead. Let's take a look at a `Zephyr` chat template, though note this
-one is a little simplified from the actual one!
+default template for that model class is used instead. Let's take a look at the template for `BlenderBot`:
+
+```python
+
+>>> from transformers import AutoTokenizer
+>>> tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+
+>>> tokenizer.default_chat_template
+"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}"
+```
+
+That's kind of intimidating. Let's add some newlines and indentation to make it more readable. Note that the first
+newline after each block as well as any preceding whitespace before a block are ignored by default, using the 
+Jinja `trim_blocks` and `lstrip_blocks` flags. However, be cautious - although leading whitespace on each
+line is stripped, spaces between blocks on the same line are not. We strongly recommend checking that your template
+isn't printing extra spaces where it shouldn't be!

 ```
-{%- for message in messages %}
-    {{- '<|' + message['role'] + |>\n' }}
-    {{- message['content'] + eos_token }}
-{%- endfor %}
-{%- if add_generation_prompt %}
-    {{- '<|assistant|>\n' }}
-{%- endif %}
+{% for message in messages %}
+    {% if message['role'] == 'user' %}
+        {{ ' ' }}
+    {% endif %}
+    {{ message['content'] }}
+    {% if not loop.last %}
+        {{ '  ' }}
+    {% endif %}
+{% endfor %}
+{{ eos_token }}
 ```

 If you've never seen one of these before, this is a [Jinja template](https://jinja.palletsprojects.com/en/3.1.x/templates/).
@ -653,38 +271,40 @@ Jinja is a templating language that allows you to write simple code that generat
 syntax resembles Python. In pure Python, this template would look something like this:

 ```python
-for message in messages:
-    print(f'<|{message["role"]}|>')
-    print(message['content'] + eos_token)
-if add_generation_prompt:
-    print('<|assistant|>')
+for idx, message in enumerate(messages):
+    if message['role'] == 'user':
+        print(' ')
+    print(message['content'])
+    if not idx == len(messages) - 1:  # Check for the last message in the conversation
+        print('  ')
+print(eos_token)
 ```

 Effectively, the template does three things:
-1. For each message, print the role enclosed in `<|` and `|>`, like `<|user|>` or `<|assistant|>`.
-2. Next, print the content of the message, followed by the end-of-sequence token.
-3. Finally, if `add_generation_prompt` is set, print the assistant token, so that the model knows to start generating
-   an assistant response.
+1. For each message, if the message is a user message, add a blank space before it, otherwise print nothing.
+2. Add the message content
+3. If the message is not the last message, add two spaces after it. After the final message, print the EOS token.

-This is a pretty simple template but Jinja gives you a lot of flexibility to do more complex things! Let's see a Jinja
-template that can format inputs similarly to the way LLaMA formats them (note that the real LLaMA template includes 
-handling for default system messages and slightly different system message handling in general - don't use this one 
-in your actual code!)
+This is a pretty simple template - it doesn't add any control tokens, and it doesn't support "system" messages, which 
+are a common way to give the model directives about how it should behave in the subsequent conversation.
+But Jinja gives you a lot of flexibility to do those things! Let's see a Jinja template that can format inputs
+similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system
+messages and slightly different system message handling in general - don't use this one in your actual code!)

 ```
-{%- for message in messages %}
-    {%- if message['role'] == 'user' %}
-        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
-    {%- elif message['role'] == 'system' %}
-        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
-    {%- elif message['role'] == 'assistant' %}
-        {{- ' '  + message['content'] + ' ' + eos_token }}
-    {%- endif %}
-{%- endfor %}
+{% for message in messages %}
+    {% if message['role'] == 'user' %}
+        {{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
+    {% elif message['role'] == 'system' %}
+        {{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
+    {% elif message['role'] == 'assistant' %}
+        {{ ' '  + message['content'] + ' ' + eos_token }}
+    {% endif %}
+{% endfor %}
 ```

-Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens like
-`[INST]` and `[/INST]` based on the role of each message. User, assistant and system messages are clearly
+Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based
+on the "role" of each message, which represents who sent it. User, assistant and system messages are clearly
 distinguishable to the model because of the tokens they're wrapped in.

 ## Advanced: Adding and editing chat templates
@ -696,15 +316,15 @@ existing template from another model and simply edit it for your needs! For exam
 above and add "[ASST]" and "[/ASST]" to assistant messages:

 ```
-{%- for message in messages %}
-    {%- if message['role'] == 'user' %}
-        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
-    {%- elif message['role'] == 'system' %}
-        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
-    {%- elif message['role'] == 'assistant' %}
-        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
-    {%- endif %}
-{%- endfor %}
+{% for message in messages %}
+    {% if message['role'] == 'user' %}
+        {{ bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
+    {% elif message['role'] == 'system' %}
+        {{ '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
+    {% elif message['role'] == 'assistant' %}
+        {{ '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
+    {% endif %}
+{% endfor %}
 ```

 Now, simply set the `tokenizer.chat_template` attribute. Next time you use [`~PreTrainedTokenizer.apply_chat_template`], it will
@ -731,23 +351,22 @@ template. This will ensure that text generation tools can correctly figure out w
 </Tip>


-### Why do some models have multiple templates?
+### What are "default" templates?

-Some models use different templates for different use cases. For example, they might use one template for normal chat
-and another for tool-use, or retrieval-augmented generation. In these cases, `tokenizer.chat_template` is a dictionary.
-This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
-Jinja statements like `if tools is defined` and `{% macro %}` definitions to easily wrap multiple code paths in a
-single template.
+Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards 
+compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a
+model does not have a chat template set, but there is a default template for its model class, the `TextGenerationPipeline`
+class and methods like `apply_chat_template` will use the class template instead. You can find out what the default
+template for your tokenizer is by checking the `tokenizer.default_chat_template` attribute.

-When a tokenizer has multiple templates, `tokenizer.chat_template` will be a `dict`, where each key is the name
-of a template. The `apply_chat_template` method has special handling for certain template names: Specifically, it will
-look for a template named `default` in most cases, and will raise an error if it can't find one. However, if a template
-named `tool_use` exists when the user has passed a `tools` argument, it will use that instead. To access templates
-with other names, pass the name of the template you want to the `chat_template` argument of
-`apply_chat_template()`.
+This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when
+the class template is appropriate for your model, we strongly recommend overriding the default template by
+setting the `chat_template` attribute explicitly to make it clear to users that your model has been correctly configured
+for chat.

-We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
-trying to put it all in a single template where possible!
+Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be
+removed in a future release. We strongly recommend setting the `chat_template` attribute for any tokenizers that
+still depend on them!

 ### What template should I use?

@ -763,9 +382,9 @@ input formats. One popular choice is the `ChatML` format, and this is a good, fl
 It looks like this:

 ```
-{%- for message in messages %}
-    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
-{%- endfor %}
+{% for message in messages %}
+    {{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}
+{% endfor %}
 ```

 If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
@ -810,113 +429,60 @@ it's time to put an end to them!

 ## Advanced: Template writing tips

-<Tip>
+If you're unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first
+write a short Python script that formats messages the way you want, and then convert that script into a template.

-The easiest way to get started with writing Jinja templates is to take a look at some existing ones. You can use
-`print(tokenizer.chat_template)` for any chat model to see what template it's using. In general, models that support tool use have 
-much more complex templates than other models - so when you're just getting started, they're probably a bad example
-to learn from! You can also take a look at the 
-[Jinja documentation](https://jinja.palletsprojects.com/en/3.1.x/templates/#synopsis) for details
-of general Jinja formatting and syntax.
+Remember that the template handler will receive the conversation history as a variable called `messages`. Each
+message is a dictionary with two keys, `role` and `content`. You will be able to access `messages` in your template
+just like you can in Python, which means you can loop over it with `{% for message in messages %}` or access
+individual messages with, for example, `{{ messages[0] }}`.

-</Tip>
+You can also use the following tips to convert your code to Jinja:

-Jinja templates in `transformers` are identical to Jinja templates elsewhere. The main thing to know is that 
-the conversation history will be accessible inside your template as a variable called `messages`.  
-You will be able to access `messages` in your template just like you can in Python, which means you can loop over 
-it with `{% for message in messages %}` or access individual messages with `{{ messages[0] }}`, for example.
+### For loops

-You can also use the following tips to write clean, efficient Jinja templates:
-
-### Trimming whitespace
-
-By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
-templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
-your templates like this:
-
-```
-{%- for message in messages %}
-    {{- message['role'] + message['content'] }}
-{%- endfor %}
-```
-
-rather than like this:
+For loops in Jinja look like this:

 ```
 {% for message in messages %}
-    {{ message['role'] + message['content'] }}
+{{ message['content'] }}
 {% endfor %}
 ```

-Adding `-` will strip any whitespace that comes before the block. The second example looks innocent, but the newline
-and indentation may end up being included in the output, which is probably not what you want!
+Note that whatever's inside the {{ expression block }} will be printed to the output. You can use operators like
+`+` to combine strings inside expression blocks.
+
+### If statements
+
+If statements in Jinja look like this:
+
+```
+{% if message['role'] == 'user' %}
+{{ message['content'] }}
+{% endif %}
+```
+
+Note how where Python uses whitespace to mark the beginnings and ends of `for` and `if` blocks, Jinja requires you
+to explicitly end them with `{% endfor %}` and `{% endif %}`.

 ### Special variables

-Inside your template, you will have access several special variables. The most important of these is `messages`, 
-which contains the chat history as a list of message dicts. However, there are several others. Not every
-variable will be used in every template. The most common other variables are:
+Inside your template, you will have access to the list of `messages`, but you can also access several other special
+variables. These include special tokens like `bos_token` and `eos_token`, as well as the `add_generation_prompt`
+variable that we discussed above. You can also use the `loop` variable to access information about the current loop
+iteration, for example  using `{% if loop.last %}` to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is `True`:

- `tools` contains a list of tools in JSON schema format. Will be `None` or undefined if no tools are passed.
- `documents` contains a list of documents in the format `{"title": "Title", "contents": "Contents"}`, used for retrieval-augmented generation. Will be `None` or undefined if no documents are passed.
- `add_generation_prompt` is a bool that is `True` if the user has requested a generation prompt, and `False` otherwise. If this is set, your template should add the header for an assistant message to the end of the conversation. If your model doesn't have a specific header for assistant messages, you can ignore this flag.
- **Special tokens** like `bos_token` and `eos_token`. These are extracted from `tokenizer.special_tokens_map`. The exact tokens available inside each template will differ depending on the parent tokenizer.
-
-<Tip>
-
-You can actually pass any `kwarg` to `apply_chat_template`, and it will be accessible inside the template as a variable. In general,
-we recommend trying to stick to the core variables above, as it will make your model harder to use if users have
-to write custom code to pass model-specific `kwargs`. However, we're aware that this field moves quickly, so if you
-have a new use-case that doesn't fit in the core API, feel free to use a new `kwarg` for it! If a new `kwarg`
-becomes common we may promote it into the core API and create a standard, documented format for it.
-
-</Tip>
-
-### Callable functions
-
-There is also a short list of callable functions available to you inside your templates. These are:
-
- `raise_exception(msg)`: Raises a `TemplateException`. This is useful for debugging, and for telling users when they're
-doing something that your template doesn't support.
- `strftime_now(format_str)`: Equivalent to `datetime.now().strftime(format_str)` in Python. This is used for getting
-the current date/time in a specific format, which is sometimes included in system messages.
-
-### Compatibility with non-Python Jinja
-
-There are multiple implementations of Jinja in various languages. They generally have the same syntax,
-but a key difference is that when you're writing a template in Python you can use Python methods, such as
-`.lower()` on strings or `.items()` on dicts. This will break if someone tries to use your template on a non-Python
-implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
-and Rust are very popular. 
-
-Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
-all implementations of Jinja:
-
- Replace Python methods with Jinja filters. These usually have the same name, for example `string.lower()` becomes
-  `string|lower`, and `dict.items()` becomes `dict|items`. One notable change is that `string.strip()` becomes `string|trim`.
-  See the [list of built-in filters](https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters)
-  in the Jinja documentation for more.
- Replace `True`, `False` and `None`, which are Python-specific, with `true`, `false` and `none`.
- Directly rendering a dict or list may give different results in other implementations (for example, string entries
-  might change from single-quoted to double-quoted). Adding the `tojson` filter can help to ensure consistency here.
-
-### Writing and debugging larger templates
-
-When this feature was introduced, most templates were quite small, the Jinja equivalent of a "one-liner" script. 
-However, with new models and features like tool-use and RAG, some templates can be 100 lines long or more. When
-writing templates like these, it's a good idea to write them in a separate file, using a text editor. You can easily 
-extract a chat template to a file:
-
-```python
-open("template.jinja", "w").write(tokenizer.chat_template)
+```
+{% if loop.last and add_generation_prompt %}
+{{ bos_token + 'Assistant:\n' }}
+{% endif %}
 ```

-Or load the edited template back into the tokenizer:
+### Notes on whitespace

-```python
-tokenizer.chat_template = open("template.jinja").read()
-```
-
-As an added bonus, when you write a long, multi-line template in a separate file, line numbers in that file will
-exactly correspond to line numbers in template parsing or execution errors. This will make it much easier to
-identify the source of issues.
+As much as possible, we've tried to get Jinja to ignore whitespace outside of {{ expressions }}. However, be aware
+that Jinja is a general-purpose templating engine, and it may treat whitespace between blocks on the same line
+as significant and print it to the output. We **strongly** recommend checking that your template isn't printing extra
+spaces where it shouldn't be before you upload it!
--- a/docs/source/en/community.md
+++ b/docs/source/en/community.md
@ -63,8 +63,7 @@ This page regroups resources around 🤗 Transformers developed by the community
 | [Evaluate LUKE on TACRED, a relation extraction dataset](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_tacred.ipynb) | How to evaluate *LukeForEntityPairClassification* on the TACRED dataset | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_tacred.ipynb) |
 | [Evaluate LUKE on CoNLL-2003, an important NER benchmark](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_conll_2003.ipynb) | How to evaluate *LukeForEntitySpanClassification* on the CoNLL-2003 dataset | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_conll_2003.ipynb) |
 | [Evaluate BigBird-Pegasus on PubMed dataset](https://github.com/vasudevgupta7/bigbird/blob/main/notebooks/bigbird_pegasus_evaluation.ipynb) | How to evaluate *BigBirdPegasusForConditionalGeneration* on PubMed dataset | [Vasudev Gupta](https://github.com/vasudevgupta7) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vasudevgupta7/bigbird/blob/main/notebooks/bigbird_pegasus_evaluation.ipynb) |
-| [Speech Emotion Classification with Wav2Vec2](https://github.com/m3hrdadfi/soxan/blob/main/notebooks/Emotion_recognition_in_Greek_speech_using_Wav2Vec2.ipynb) | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | [Mehrdad Farahani](https://github.com/m3hrdadfi) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/m3hrdadfi/soxan/blob/main/notebooks/Emotion_recognition_in_Greek_speech_using_Wav2Vec2.ipynb) |
+| [Speech Emotion Classification with Wav2Vec2](https://github/m3hrdadfi/soxan/blob/main/notebooks/Emotion_recognition_in_Greek_speech_using_Wav2Vec2.ipynb) | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | [Mehrdad Farahani](https://github.com/m3hrdadfi) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/m3hrdadfi/soxan/blob/main/notebooks/Emotion_recognition_in_Greek_speech_using_Wav2Vec2.ipynb) |
 | [Detect objects in an image with DETR](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_minimal_example_(with_DetrFeatureExtractor).ipynb) | How to use a trained *DetrForObjectDetection* model to detect objects in an image and visualize attention | [Niels Rogge](https://github.com/NielsRogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_minimal_example_(with_DetrFeatureExtractor).ipynb) |
 | [Fine-tune DETR on a custom object detection dataset](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DETR/Fine_tuning_DetrForObjectDetection_on_custom_dataset_(balloon).ipynb) | How to fine-tune *DetrForObjectDetection* on a custom object detection dataset | [Niels Rogge](https://github.com/NielsRogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/Fine_tuning_DetrForObjectDetection_on_custom_dataset_(balloon).ipynb) |
 | [Finetune T5 for Named Entity Recognition](https://github.com/ToluClassics/Notebooks/blob/main/T5_Ner_Finetuning.ipynb) | How to fine-tune *T5* on a Named Entity Recognition Task | [Ogundepo Odunayo](https://github.com/ToluClassics) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1obr78FY_cBmWY5ODViCmzdY6O1KB65Vc?usp=sharing) |
-| [Fine-Tuning Open-Source LLM using QLoRA with MLflow and PEFT](https://github.com/mlflow/mlflow/blob/master/docs/source/llms/transformers/tutorials/fine-tuning/transformers-peft.ipynb) | How to use [QLoRA](https://github.com/artidoro/qlora) and [PEFT](https://huggingface.co/docs/peft/en/index) to fine-tune an LLM in a memory-efficient way, while using [MLflow](https://mlflow.org/docs/latest/llms/transformers/index.html) to manage experiment tracking | [Yuki Watanabe](https://github.com/B-Step62) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mlflow/mlflow/blob/master/docs/source/llms/transformers/tutorials/fine-tuning/transformers-peft.ipynb) |
--- a/docs/source/en/conversations.md
+++ b/docs/source/en/conversations.md
@ -1,290 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# Chatting with Transformers
-
-If you're reading this article, you're almost certainly aware of **chat models**. Chat models are conversational
-AIs that you can send and receive messages with. The most famous of these is the proprietary ChatGPT, but there are
-now many open-source chat models which match or even substantially exceed its performance. These models are free to
-download and run on a local machine. Although the largest and most capable models require high-powered hardware
-and lots of memory to run, there are smaller models that will run perfectly well on a single consumer GPU, or even
-an ordinary desktop or notebook CPU. 
-
-This guide will help you get started with chat models. We'll start with a brief quickstart guide that uses a convenient,
-high-level "pipeline". This is all you need if you just want to start running a chat model 
-immediately. After the quickstart, we'll move on to more detailed information about
-what exactly chat models are, how to choose an appropriate one, and a low-level breakdown of each of the
-steps involved in talking to a chat model. We'll also give some tips on optimizing the performance and memory usage
-of your chat models.
-
-
-## Quickstart
-
-If you have no time for details, here's the brief summary: Chat models continue chats. This means that you pass them
-a conversation history, which can be as short as a single user message, and the model will continue the conversation
-by adding its response. Let's see this in action. First, let's build a chat:
-
-```python
-chat = [
-    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
-    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
-]
-```
-
-Notice that in addition to the user's message, we added a **system** message at the start of the conversation. Not all
-chat models support system messages, but when they do, they represent high-level directives about how the model
-should behave in the conversation. You can use this to guide the model - whether you want short or long responses,
-lighthearted or serious ones, and so on. If you want the model to do useful work instead of
-practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
-AI assistant who responds to user queries."
-
-Once you have a chat, the quickest way to continue it is using the [`TextGenerationPipeline`]. 
-Let's see this in action with `LLaMA-3`. Note that `LLaMA-3` is a gated model, which means you will need to 
-[apply for access](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) and log in with your Hugging Face 
-account to use it. We'll also use `device_map="auto"`, which will load the model on GPU if there's enough memory
-for it, and set the dtype to `torch.bfloat16` to save memory:
-
-```python
-import torch
-from transformers import pipeline
-
-pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
-response = pipe(chat, max_new_tokens=512)
-print(response[0]['generated_text'][-1]['content'])
-```
-
-And you'll get:
-
-```text
-(sigh) Oh boy, you're asking me for advice? You're gonna need a map, pal! Alright, 
-alright, I'll give you the lowdown. But don't say I didn't warn you, I'm a robot, not a tour guide!
-
-So, you wanna know what's fun to do in the Big Apple? Well, let me tell you, there's a million 
-things to do, but I'll give you the highlights. First off, you gotta see the sights: the Statue of 
-Liberty, Central Park, Times Square... you know, the usual tourist traps. But if you're lookin' for 
-something a little more... unusual, I'd recommend checkin' out the Museum of Modern Art. It's got 
-some wild stuff, like that Warhol guy's soup cans and all that jazz.
-
-And if you're feelin' adventurous, take a walk across the Brooklyn Bridge. Just watch out for 
-those pesky pigeons, they're like little feathered thieves! (laughs) Get it? Thieves? Ah, never mind.
-
-Now, if you're lookin' for some serious fun, hit up the comedy clubs in Greenwich Village. You might 
-even catch a glimpse of some up-and-coming comedians... or a bunch of wannabes tryin' to make it big. (winks)
-
-And finally, if you're feelin' like a real New Yorker, grab a slice of pizza from one of the many amazing
-pizzerias around the city. Just don't try to order a "robot-sized" slice, trust me, it won't end well. (laughs)
-
-So, there you have it, pal! That's my expert advice on what to do in New York. Now, if you'll
-excuse me, I've got some oil changes to attend to. (winks)
-```
-
-You can continue the chat by appending your own response to it. The
-`response` object returned by the pipeline actually contains the entire chat so far, so we can simply append
-a message and pass it back:
-
-```python
-chat = response[0]['generated_text']
-chat.append(
-    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
-)
-response = pipe(chat, max_new_tokens=512)
-print(response[0]['generated_text'][-1]['content'])
-```
-
-And you'll get:
-
-```text
-(laughs) Oh, you're killin' me, pal! You don't get it, do you? Warhol's soup cans are like, art, man! 
-It's like, he took something totally mundane, like a can of soup, and turned it into a masterpiece. It's 
-like, "Hey, look at me, I'm a can of soup, but I'm also a work of art!" 
-(sarcastically) Oh, yeah, real original, Andy.
-
-But, you know, back in the '60s, it was like, a big deal. People were all about challenging the
-status quo, and Warhol was like, the king of that. He took the ordinary and made it extraordinary.
-And, let me tell you, it was like, a real game-changer. I mean, who would've thought that a can of soup could be art? (laughs)
-
-But, hey, you're not alone, pal. I mean, I'm a robot, and even I don't get it. (winks)
-But, hey, that's what makes art, art, right? (laughs)
-```
-
-The remainder of this tutorial will cover specific topics such
-as performance and memory, or how to select a chat model for your needs.
-
-## Choosing a chat model
-
-There are an enormous number of different chat models available on the [Hugging Face Hub](https://huggingface.co/models?pipeline_tag=text-generation&sort=trending),
-and new users often feel very overwhelmed by the selection offered. Don't be, though! You really need to just focus on
-two important considerations: 
- The model's size, which will determine if you can fit it in memory and how quickly it will
-run.
- The quality of the model's chat output.
-
-In general, these are correlated - bigger models tend to be 
-more capable, but even so there's a lot of variation at a given size point!
-
-### Size and model naming
-The size of a model is easy to spot - it's the number in the model name, like "8B" or "70B". This is the number of
-**parameters** in the model. Without quantization, you should expect to need about 2 bytes of memory per parameter.
-This means that an "8B" model with 8 billion parameters will need about 16GB of memory just to fit the parameters, 
-plus a little extra for other overhead. It's a good fit for a high-end consumer GPU with 24GB of memory, such as a 3090
-or 4090.
-
-Some chat models are "Mixture of Experts" models. These may list their sizes in different ways, such as "8x7B" or 
-"141B-A35B". The numbers are a little fuzzier here, but in general you can read this as saying that the model
-has approximately 56 (8x7) billion parameters in the first case, or 141 billion parameters in the second case.
-
-Note that it is very common to use quantization techniques to reduce the memory usage per parameter to 8 bits, 4 bits,
-or even less. This topic is discussed in more detail in the [Memory considerations](#memory-considerations) section below.
-
-### But which chat model is best?
-Even once you know the size of chat model you can run, there's still a lot of choice out there. One way to sift through
-it all is to consult **leaderboards**. Two of the most popular leaderboards are the [OpenLLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
-and the [LMSys Chatbot Arena Leaderboard](https://chat.lmsys.org/?leaderboard). Note that the LMSys leaderboard
-also includes proprietary models - look at the `licence` column to identify open-source ones that you can download, then
-search for them on the [Hugging Face Hub](https://huggingface.co/models?pipeline_tag=text-generation&sort=trending).
-
-### Specialist domains
-Some models may be specialized for certain domains, such as medical or legal text, or non-English languages. 
-If you're working in these domains, you may find that a specialized model will give you big performance benefits. 
-Don't automatically assume that, though! Particularly when specialized models are smaller or older than the current 
-cutting-edge, a top-end general-purpose model may still outclass them. Thankfully, we are beginning to see 
-[domain-specific leaderboards](https://huggingface.co/blog/leaderboard-medicalllm) that should make it easier to locate
-the best models for specialized domains.
-
-## What happens inside the pipeline?
-
-The quickstart above used a high-level pipeline to chat with a chat model, which is convenient, but not the
-most flexible. Let's take a more low-level approach, to see each of the steps involved in chat. Let's start with
-a code sample, and then break it down:
-
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-
-# Prepare the input as before
-chat = [
-    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
-    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
-]
-
-# 1: Load the model and tokenizer
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
-
-# 2: Apply the chat template
-formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-print("Formatted chat:\n", formatted_chat)
-
-# 3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
-inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
-# Move the tokenized inputs to the same device the model is on (GPU/CPU)
-inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
-print("Tokenized inputs:\n", inputs)
-
-# 4: Generate text from the model
-outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1)
-print("Generated tokens:\n", outputs)
-
-# 5: Decode the output back to a string
-decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
-print("Decoded output:\n", decoded_output)
-```
-
-There's a lot in here, each piece of which could be its own document! Rather than going into too much detail, I'll cover
-the broad ideas, and leave the details for the linked documents. The key steps are:
-
-1. [Models](https://huggingface.co/learn/nlp-course/en/chapter2/3) and [Tokenizers](https://huggingface.co/learn/nlp-course/en/chapter2/4?fw=pt) are loaded from the Hugging Face Hub.
-2. The chat is formatted using the tokenizer's [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating)
-3. The formatted chat is [tokenized](https://huggingface.co/learn/nlp-course/en/chapter2/4) using the tokenizer.
-4. We [generate](https://huggingface.co/docs/transformers/en/llm_tutorial) a response from the model.
-5. The tokens output by the model are decoded back to a string
-
-## Performance, memory and hardware
-
-You probably know by now that most machine learning tasks are run on GPUs. However, it is entirely possible
-to generate text from a chat model or language model on a CPU, albeit somewhat more slowly. If you can fit
-the model in GPU memory, though, this will usually be the preferable option.
-
-### Memory considerations
-
-By default, Hugging Face classes like [`TextGenerationPipeline`] or [`AutoModelForCausalLM`] will load the model in 
-`float32` precision. This means that it will need 4 bytes (32 bits) per parameter, so an "8B" model with 8 billion
-parameters will need ~32GB of memory. However, this can be wasteful! Most modern language models are trained in 
-"bfloat16" precision, which uses only 2 bytes per parameter. If your hardware supports it (Nvidia 30xx/Axxx
-or newer), you can load the model in `bfloat16` precision, using the `torch_dtype` argument as we did above.
-
-It is possible to go even lower than 16-bits using "quantization", a method to lossily compress model weights. This
-allows each parameter to be squeezed down to 8 bits, 4 bits or even less. Note that, especially at 4 bits,
-the model's outputs may be negatively affected, but often this is a tradeoff worth making to fit a larger and more
-capable chat model in memory. Let's see this in action with `bitsandbytes`:
-
-```python
-from transformers import AutoModelForCausalLM, BitsAndBytesConfig
-
-quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", quantization_config=quantization_config)
-```
-
-Or we can do the same thing using the `pipeline` API:
-
-```python
-from transformers import pipeline, BitsAndBytesConfig
-
-quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
-pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})
-```
-
-There are several other options for quantizing models besides `bitsandbytes` - please see the [Quantization guide](./quantization)
-for more information.
-
-### Performance considerations
-
-<Tip>
-
-For a more extensive guide on language model performance and optimization, check out [LLM Inference Optimization](./llm_optims) .
-
-</Tip>
-
-
-As a general rule, larger chat models will be slower in addition to requiring more memory. It's possible to be
-more concrete about this, though: Generating text from a chat model is unusual in that it is bottlenecked by
-**memory bandwidth** rather than compute power, because every active parameter must be read from memory for each
-token that the model generates. This means that number of tokens per second you can generate from a chat
-model is generally proportional to the total bandwidth of the memory it resides in, divided by the size of the model.
-
-In our quickstart example above, our model was ~16GB in size when loaded in `bfloat16` precision. 
-This means that 16GB must be read from memory for every token generated by the model. Total memory bandwidth can
-vary from 20-100GB/sec for consumer CPUs to 200-900GB/sec for consumer GPUs, specialized CPUs like
-Intel Xeon, AMD Threadripper/Epyc or high-end Apple silicon, and finally up to 2-3TB/sec for data center GPUs like
-the Nvidia A100 or H100. This should give you a good idea of the generation speed you can expect from these different
-hardware types.
-
-Therefore, if you want to improve the speed of text generation, the easiest solution is to either reduce the
-size of the model in memory (usually by quantization), or get hardware with higher memory bandwidth. For advanced users, 
-several other techniques exist to get around this bandwidth bottleneck. The most common are variants on 
-[assisted generation](https://huggingface.co/blog/assisted-generation), also known as "speculative
-sampling". These techniques try to guess multiple future tokens at once, often using a smaller "draft model", and then
-confirm these generations with the chat model. If the guesses are validated by the chat model, more than one token can
-be generated per forward pass, which greatly alleviates the bandwidth bottleneck and improves generation speed.  
-
-Finally, we should also note the impact of "Mixture of Experts" (MoE) models here. Several popular chat models,
-such as Mixtral, Qwen-MoE and DBRX, are MoE models. In these models, not every parameter is active for every token generated.
-As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
-can be quite large. They can therefore be several times faster than a normal "dense" model of the same size. However,
-techniques like assisted generation are generally ineffective for these models because more parameters will become
-active with each new speculated token, which will negate the bandwidth and speed benefits that the MoE architecture
-provides.
-
--- a/docs/source/en/create_a_model.md
+++ b/docs/source/en/create_a_model.md
@ -327,21 +327,31 @@ For example, to load a [ResNet](../model_doc/resnet) backbone into a [MaskFormer
 Set `use_pretrained_backbone=True` to load pretrained ResNet weights for the backbone.

 ```py
-from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig

-config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=True) # backbone and neck config
+config = MaskFormerConfig(backbone="microsoft/resnet50", use_pretrained_backbone=True) # backbone and neck config
 model = MaskFormerForInstanceSegmentation(config) # head
 ```

+You could also load the backbone config separately and then pass it to the model config.
+
+```py
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig
+
+backbone_config = ResNetConfig.from_pretrained("microsoft/resnet-50")
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+```
+
 </hfoption>
 <hfoption id="random weights">

 Set `use_pretrained_backbone=False` to randomly initialize a ResNet backbone.

 ```py
-from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig

-config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=False) # backbone and neck config
+config = MaskFormerConfig(backbone="microsoft/resnet50", use_pretrained_backbone=False) # backbone and neck config
 model = MaskFormerForInstanceSegmentation(config) # head
 ```

@ -356,43 +366,15 @@ model = MaskFormerForInstanceSegmentation(config)
 ```

 </hfoption>
-</hfoptions id="timm backbone">
+</hfoptions>

-[timm](https://hf.co/docs/timm/index) models are loaded within a model with `use_timm_backbone=True` or with [`TimmBackbone`] and [`TimmBackboneConfig`].
-
-Use `use_timm_backbone=True` and `use_pretrained_backbone=True` to load pretrained timm weights for the backbone.
-
-```python
-from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
-
-config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=True, use_timm_backbone=True) # backbone and neck config
-model = MaskFormerForInstanceSegmentation(config) # head
-```
-
-Set `use_timm_backbone=True` and `use_pretrained_backbone=False` to load a randomly initialized timm backbone.
-
-```python
-from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
-
-config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=False, use_timm_backbone=True) # backbone and neck config
-model = MaskFormerForInstanceSegmentation(config) # head
-```
-
-You could also load the backbone config and use it to create a `TimmBackbone` or pass it to the model config. Timm backbones will load pretrained weights by default. Set `use_pretrained_backbone=False` to load randomly initialized weights.
+[timm](https://hf.co/docs/timm/index) models are loaded with [`TimmBackbone`] and [`TimmBackboneConfig`].

 ```python
 from transformers import TimmBackboneConfig, TimmBackbone

-backbone_config = TimmBackboneConfig("resnet50", use_pretrained_backbone=False)
-
-# Create a backbone class
-backbone = TimmBackbone(config=backbone_config)
-
-# Create a model with a timm backbone
-from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
-
-config = MaskFormerConfig(backbone_config=backbone_config)
-model = MaskFormerForInstanceSegmentation(config)
+backbone_config = TimmBackboneConfig("resnet50")
+model = TimmBackbone(config=backbone_config)
 ```

 ## Feature extractor
--- a/docs/source/en/custom_models.md
+++ b/docs/source/en/custom_models.md
@ -185,7 +185,7 @@ class ResnetModelForImageClassification(PreTrainedModel):
    def forward(self, tensor, labels=None):
        logits = self.model(tensor)
        if labels is not None:
-            loss = torch.nn.functional.cross_entropy(logits, labels)
+            loss = torch.nn.cross_entropy(logits, labels)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}
 ```
--- a/docs/source/en/custom_tools.md
+++ b/docs/source/en/custom_tools.md
@ -0,0 +1,798 @@
+<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# Custom Tools and Prompts
+
+<Tip>
+
+If you are not aware of what tools and agents are in the context of transformers, we recommend you read the
+[Transformers Agents](transformers_agents) page first.
+
+</Tip>
+
+<Tip warning={true}>
+
+Transformers Agents is an experimental API that is subject to change at any time. Results returned by the agents
+can vary as the APIs or underlying models are prone to change.
+
+</Tip>
+
+Creating and using custom tools and prompts is paramount to empowering the agent and having it perform new tasks.
+In this guide we'll take a look at:
+
+- How to customize the prompt
+- How to use custom tools
+- How to create custom tools
+
+## Customizing the prompt
+
+As explained in [Transformers Agents](transformers_agents) agents can run in [`~Agent.run`] and [`~Agent.chat`] mode.
+Both the `run` and `chat` modes underlie the same logic. The language model powering the agent is conditioned on a long 
+prompt and completes the prompt by generating the next tokens until the stop token is reached.
+The only difference between the two modes is that during the `chat` mode the prompt is extended with 
+previous user inputs and model generations. This allows the agent to have access to past interactions,
+seemingly giving the agent some kind of memory.
+
+### Structure of the prompt
+
+Let's take a closer look at how the prompt is structured to understand how it can be best customized.
+The prompt is structured broadly into four parts.
+
+1. Introduction: how the agent should behave, explanation of the concept of tools.
+2. Description of all the tools. This is defined by a `<<all_tools>>` token that is dynamically replaced at runtime with the tools defined/chosen by the user.
+3. A set of examples of tasks and their solution
+4. Current example, and request for solution.
+
+To better understand each part, let's look at a shortened version of how the `run` prompt can look like:
+
+````text
+I will ask you to perform a task, your job is to come up with a series of simple commands in Python that will perform the task.
+[...]
+You can print intermediate results if it makes sense to do so.
+
+Tools:
+- document_qa: This is a tool that answers a question about a document (pdf). It takes an input named `document` which should be the document containing the information, as well as a `question` that is the question about the document. It returns a text that contains the answer to the question.
+- image_captioner: This is a tool that generates a description of an image. It takes an input named `image` which should be the image to the caption and returns a text that contains the description in English.
+[...]
+
+Task: "Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French."
+
+I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image.
+
+Answer:
+```py
+translated_question = translator(question=question, src_lang="French", tgt_lang="English")
+print(f"The translated question is {translated_question}.")
+answer = image_qa(image=image, question=translated_question)
+print(f"The answer is {answer}")
+```
+
+Task: "Identify the oldest person in the `document` and create an image showcasing the result as a banner."
+
+I will use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
+
+Answer:
+```py
+answer = document_qa(document, question="What is the oldest person?")
+print(f"The answer is {answer}.")
+image = image_generator("A banner showing " + answer)
+```
+
+[...]
+
+Task: "Draw me a picture of rivers and lakes"
+
+I will use the following
+````
+
+The introduction (the text before *"Tools:"*) explains precisely how the model shall behave and what it should do.
+This part most likely does not need to be customized as the agent shall always behave the same way.
+
+The second part (the bullet points below *"Tools"*) is dynamically added upon calling `run` or `chat`. There are 
+exactly as many bullet points as there are tools in `agent.toolbox` and each bullet point consists of the name 
+and description of the tool:
+
+```text
+- <tool.name>: <tool.description>
+```
+
+Let's verify this quickly by loading the document_qa tool and printing out the name and description.
+
+```py
+from transformers import load_tool
+
+document_qa = load_tool("document-question-answering")
+print(f"- {document_qa.name}: {document_qa.description}")
+```
+
+which gives:
+```text
+- document_qa: This is a tool that answers a question about a document (pdf). It takes an input named `document` which should be the document containing the information, as well as a `question` that is the question about the document. It returns a text that contains the answer to the question.
+```
+
+We can see that the tool name is short and precise. The description includes two parts, the first explaining 
+what the tool does and the second states what input arguments and return values are expected.
+
+A good tool name and tool description are very important for the agent to correctly use it. Note that the only
+information the agent has about the tool is its name and description, so one should make sure that both 
+are precisely written and match the style of the existing tools in the toolbox. In particular make sure the description
+mentions all the arguments expected by name in code-style, along with the expected type and a description of what they
+are.
+
+<Tip>
+
+Check the naming and description of the curated Transformers tools to better understand what name and 
+description a tool is expected to have. You can see all tools with the [`Agent.toolbox`] property.
+
+</Tip>
+
+The third part includes a set of curated examples that show the agent exactly what code it should produce
+for what kind of user request. The large language models empowering the agent are extremely good at 
+recognizing patterns in a prompt and repeating the pattern with new data. Therefore, it is very important
+that the examples are written in a way that maximizes the likelihood of the agent to generating correct,
+executable code in practice. 
+
+Let's have a look at one example:
+
+````text
+Task: "Identify the oldest person in the `document` and create an image showcasing the result as a banner."
+
+I will use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
+
+Answer:
+```py
+answer = document_qa(document, question="What is the oldest person?")
+print(f"The answer is {answer}.")
+image = image_generator("A banner showing " + answer)
+```
+
+````
+
+The pattern the model is prompted to repeat has three parts: The task statement, the agent's explanation of 
+what it intends to do, and finally the generated code. Every example that is part of the prompt has this exact 
+pattern, thus making sure that the agent will reproduce exactly the same pattern when generating new tokens.
+
+The prompt examples are curated by the Transformers team and rigorously evaluated on a set of 
+[problem statements](https://github.com/huggingface/transformers/blob/main/src/transformers/tools/evaluate_agent.py)
+to ensure that the agent's prompt is as good as possible to solve real use cases of the agent.
+
+The final part of the prompt corresponds to:
+```text
+Task: "Draw me a picture of rivers and lakes"
+
+I will use the following
+```
+
+is a final and unfinished example that the agent is tasked to complete. The unfinished example
+is dynamically created based on the actual user input. For the above example, the user ran:
+
+```py
+agent.run("Draw me a picture of rivers and lakes")
+```
+
+The user input - *a.k.a* the task: *"Draw me a picture of rivers and lakes"* is cast into the 
+prompt template: "Task: <task> \n\n I will use the following". This sentence makes up the final lines of the 
+prompt the agent is conditioned on, therefore strongly influencing the agent to finish the example 
+exactly in the same way it was previously done in the examples.
+
+Without going into too much detail, the chat template has the same prompt structure with the 
+examples having a slightly different style, *e.g.*:
+
+````text
+[...]
+
+=====
+
+Human: Answer the question in the variable `question` about the image stored in the variable `image`.
+
+Assistant: I will use the tool `image_qa` to answer the question on the input image.
+
+```py
+answer = image_qa(text=question, image=image)
+print(f"The answer is {answer}")
+```
+
+Human: I tried this code, it worked but didn't give me a good result. The question is in French
+
+Assistant: In this case, the question needs to be translated first. I will use the tool `translator` to do this.
+
+```py
+translated_question = translator(question=question, src_lang="French", tgt_lang="English")
+print(f"The translated question is {translated_question}.")
+answer = image_qa(text=translated_question, image=image)
+print(f"The answer is {answer}")
+```
+
+=====
+
+[...]
+````
+
+Contrary, to the examples of the `run` prompt, each `chat` prompt example has one or more exchanges between the 
+*Human* and the *Assistant*. Every exchange is structured similarly to the example of the `run` prompt. 
+The user's input is appended to behind *Human:* and the agent is prompted to first generate what needs to be done 
+before generating code. An exchange can be based on previous exchanges, therefore allowing the user to refer
+to past exchanges as is done *e.g.* above by the user's input of "I tried **this** code" refers to the 
+previously generated code of the agent.
+
+Upon running `.chat`, the user's input or *task* is cast into an unfinished example of the form:
+```text
+Human: <user-input>\n\nAssistant:
+```
+which the agent completes. Contrary to the `run` command, the `chat` command then appends the completed example
+to the prompt, thus giving the agent more context for the next `chat` turn.
+
+Great now that we know how the prompt is structured, let's see how we can customize it!
+
+### Writing good user inputs
+
+While large language models are getting better and better at understanding users' intentions, it helps 
+enormously to be as precise as possible to help the agent pick the correct task. What does it mean to be 
+as precise as possible?
+
+The agent sees a list of tool names and their description in its prompt. The more tools are added the 
+more difficult it becomes for the agent to choose the correct tool and it's even more difficult to choose
+the correct sequences of tools to run. Let's look at a common failure case, here we will only return 
+the code to analyze it.
+
+```py
+from transformers import HfAgent
+
+agent = HfAgent("https://api-inference.huggingface.co/models/bigcode/starcoder")
+
+agent.run("Show me a tree", return_code=True)
+```
+
+gives:
+
+```text
+==Explanation from the agent==
+I will use the following tool: `image_segmenter` to create a segmentation mask for the image.
+
+
+==Code generated by the agent==
+mask = image_segmenter(image, prompt="tree")
+```
+
+which is probably not what we wanted. Instead, it is more likely that we want an image of a tree to be generated.
+To steer the agent more towards using a specific tool it can therefore be very helpful to use important keywords that 
+are present in the tool's name and description. Let's have a look.
+```py
+agent.toolbox["image_generator"].description
+```
+
+```text
+'This is a tool that creates an image according to a prompt, which is a text description. It takes an input named `prompt` which contains the image description and outputs an image.
+```
+
+The name and description make use of the keywords "image", "prompt", "create" and "generate". Using these words will most likely work better here. Let's refine our prompt a bit.
+
+```py
+agent.run("Create an image of a tree", return_code=True)
+```
+
+gives:
+```text
+==Explanation from the agent==
+I will use the following tool `image_generator` to generate an image of a tree.
+
+
+==Code generated by the agent==
+image = image_generator(prompt="tree")
+```
+
+Much better! That looks more like what we want. In short, when you notice that the agent struggles to 
+correctly map your task to the correct tools, try looking up the most pertinent keywords of the tool's name
+and description and try refining your task request with it.
+
+### Customizing the tool descriptions
+
+As we've seen before the agent has access to each of the tools' names and descriptions. The base tools 
+should have very precise names and descriptions, however, you might find that it could help to change 
+the description or name of a tool for your specific use case. This might become especially important 
+when you've added multiple tools that are very similar or if you want to use your agent only for a certain 
+domain, *e.g.* image generation and transformations.
+
+A common problem is that the agent confuses image generation with image transformation/modification when 
+used a lot for image generation tasks, *e.g.*
+```py
+agent.run("Make an image of a house and a car", return_code=True)
+```
+returns
+```text
+==Explanation from the agent== 
+I will use the following tools `image_generator` to generate an image of a house and `image_transformer` to transform the image of a car into the image of a house.
+
+==Code generated by the agent==
+house_image = image_generator(prompt="A house")
+car_image = image_generator(prompt="A car")
+house_car_image = image_transformer(image=car_image, prompt="A house")
+```
+
+which is probably not exactly what we want here. It seems like the agent has a difficult time 
+to understand the difference between `image_generator` and `image_transformer` and often uses the two together.
+
+We can help the agent here by changing the tool name and description of `image_transformer`. Let's instead call it `modifier`
+to disassociate it a bit from "image" and "prompt":
+```py
+agent.toolbox["modifier"] = agent.toolbox.pop("image_transformer")
+agent.toolbox["modifier"].description = agent.toolbox["modifier"].description.replace(
+    "transforms an image according to a prompt", "modifies an image"
+)
+```
+
+Now "modify" is a strong cue to use the new image processor which should help with the above prompt. Let's run it again.
+
+```py
+agent.run("Make an image of a house and a car", return_code=True)
+```
+
+Now we're getting:
+```text
+==Explanation from the agent==
+I will use the following tools: `image_generator` to generate an image of a house, then `image_generator` to generate an image of a car.
+
+
+==Code generated by the agent==
+house_image = image_generator(prompt="A house")
+car_image = image_generator(prompt="A car")
+```
+
+which is definitely closer to what we had in mind! However, we want to have both the house and car in the same image. Steering the task more toward single image generation should help:
+
+```py
+agent.run("Create image: 'A house and car'", return_code=True)
+```
+
+```text
+==Explanation from the agent==
+I will use the following tool: `image_generator` to generate an image.
+
+
+==Code generated by the agent==
+image = image_generator(prompt="A house and car")
+```
+
+<Tip warning={true}>
+
+Agents are still brittle for many use cases, especially when it comes to 
+slightly more complex use cases like generating an image of multiple objects.
+Both the agent itself and the underlying prompt will be further improved in the coming 
+months making sure that agents become more robust to a variety of user inputs.
+
+</Tip>
+
+### Customizing the whole prompt
+
+To give the user maximum flexibility, the whole prompt template as explained in [above](#structure-of-the-prompt)
+can be overwritten by the user. In this case make sure that your custom prompt includes an introduction section, 
+a tool section, an example section, and an unfinished example section. If you want to overwrite the `run` prompt template, 
+you can do as follows:
+
+```py
+template = """ [...] """
+
+agent = HfAgent(your_endpoint, run_prompt_template=template)
+```
+
+<Tip warning={true}>
+
+Please make sure to have the `<<all_tools>>` string and the `<<prompt>>` defined somewhere in the `template` so that the agent can be aware 
+of the tools, it has available to it as well as correctly insert the user's prompt.
+
+</Tip>
+
+Similarly, one can overwrite the `chat` prompt template. Note that the `chat` mode always uses the following format for the exchanges:
+```text
+Human: <<task>>
+
+Assistant:
+```
+
+Therefore it is important that the examples of the custom `chat` prompt template also make use of this format.
+You can overwrite the `chat` template at instantiation as follows.
+
+```python
+template = """ [...] """
+
+agent = HfAgent(url_endpoint=your_endpoint, chat_prompt_template=template)
+```
+
+<Tip warning={true}>
+
+Please make sure to have the `<<all_tools>>` string defined somewhere in the `template` so that the agent can be aware 
+of the tools, it has available to it.
+
+</Tip>
+
+In both cases, you can pass a repo ID instead of the prompt template if you would like to use a template hosted by someone in the community. The default prompts live in [this repo](https://huggingface.co/datasets/huggingface-tools/default-prompts) as an example.
+
+To upload your custom prompt on a repo on the Hub and share it with the community just make sure:
+- to use a dataset repository
+- to put the prompt template for the `run` command in a file named `run_prompt_template.txt`
+- to put the prompt template for the `chat` command in a file named `chat_prompt_template.txt`
+
+## Using custom tools
+
+<Tip warning={true}>
+
+Using custom tools in your local runtime means that you'll download code to run on your machine.
+
+ALWAYS inspect the tool you're downloading before loading it within your runtime, as you would do when
+installing a package using pip/npm/apt.
+
+</Tip>
+
+In this section, we'll be leveraging two existing custom tools that are specific to image generation:
+
+- We replace [huggingface-tools/image-transformation](https://huggingface.co/spaces/huggingface-tools/image-transformation),
+  with [diffusers/controlnet-canny-tool](https://huggingface.co/spaces/diffusers/controlnet-canny-tool) 
+  to allow for more image modifications.
+- We add a new tool for image upscaling to the default toolbox: 
+  [diffusers/latent-upscaler-tool](https://huggingface.co/spaces/diffusers/latent-upscaler-tool) replace the existing image-transformation tool.
+
+We'll start by loading the custom tools with the convenient [`load_tool`] function:
+
+```py
+from transformers import load_tool
+
+controlnet_transformer = load_tool("diffusers/controlnet-canny-tool")
+upscaler = load_tool("diffusers/latent-upscaler-tool")
+```
+
+Upon adding custom tools to an agent, the tools' descriptions and names are automatically
+included in the agents' prompts. Thus, it is imperative that custom tools have
+a well-written description and name in order for the agent to understand how to use them.
+Let's take a look at the description and name of `controlnet_transformer`:
+
+```py
+print(f"Description: '{controlnet_transformer.description}'")
+print(f"Name: '{controlnet_transformer.name}'")
+```
+
+gives 
+```text
+Description: 'This is a tool that transforms an image with ControlNet according to a prompt. 
+It takes two inputs: `image`, which should be the image to transform, and `prompt`, which should be the prompt to use to change it. It returns the modified image.'
+Name: 'image_transformer'
+```
+
+The name and description are accurate and fit the style of the [curated set of tools](./transformers_agents#a-curated-set-of-tools).
+Next, let's instantiate an agent with `controlnet_transformer` and `upscaler`:
+
+```py
+tools = [controlnet_transformer, upscaler]
+agent = HfAgent("https://api-inference.huggingface.co/models/bigcode/starcoder", additional_tools=tools)
+```
+
+This command should give you the following info:
+
+```text
+image_transformer has been replaced by <transformers_modules.diffusers.controlnet-canny-tool.bd76182c7777eba9612fc03c0
+8718a60c0aa6312.image_transformation.ControlNetTransformationTool object at 0x7f1d3bfa3a00> as provided in `additional_tools`
+```
+
+The set of curated tools already has an `image_transformer` tool which is hereby replaced with our custom tool.
+
+<Tip>
+
+Overwriting existing tools can be beneficial if we want to use a custom tool exactly for the same task as an existing tool 
+because the agent is well-versed in using the specific task. Beware that the custom tool should follow the exact same API 
+as the overwritten tool in this case, or you should adapt the prompt template to make sure all examples using that
+tool are updated.
+
+</Tip>
+
+The upscaler tool was given the name `image_upscaler` which is not yet present in the default toolbox and is therefore simply added to the list of tools.
+You can always have a look at the toolbox that is currently available to the agent via the `agent.toolbox` attribute:
+
+```py
+print("\n".join([f"- {a}" for a in agent.toolbox.keys()]))
+```
+
+```text
+- document_qa
+- image_captioner
+- image_qa
+- image_segmenter
+- transcriber
+- summarizer
+- text_classifier
+- text_qa
+- text_reader
+- translator
+- image_transformer
+- text_downloader
+- image_generator
+- video_generator
+- image_upscaler
+```
+
+Note how `image_upscaler` is now part of the agents' toolbox.
+
+Let's now try out the new tools! We will re-use the image we generated in [Transformers Agents Quickstart](./transformers_agents#single-execution-run).
+
+```py
+from diffusers.utils import load_image
+
+image = load_image(
+    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png"
+)
+```
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png" width=200> 
+
+Let's transform the image into a beautiful winter landscape:
+
+```py
+image = agent.run("Transform the image: 'A frozen lake and snowy forest'", image=image)
+```
+
+```text
+==Explanation from the agent==
+I will use the following tool: `image_transformer` to transform the image.
+
+
+==Code generated by the agent==
+image = image_transformer(image, prompt="A frozen lake and snowy forest")
+```
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes_winter.png" width=200> 
+
+The new image processing tool is based on ControlNet which can make very strong modifications to the image.
+By default the image processing tool returns an image of size 512x512 pixels. Let's see if we can upscale it.
+
+```py
+image = agent.run("Upscale the image", image)
+```
+
+```text
+==Explanation from the agent==
+I will use the following tool: `image_upscaler` to upscale the image.
+
+
+==Code generated by the agent==
+upscaled_image = image_upscaler(image)
+```
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes_winter_upscale.png" width=400> 
+
+The agent automatically mapped our prompt "Upscale the image" to the just added upscaler tool purely based on the description and name of the upscaler tool 
+and was able to correctly run it.
+
+Next, let's have a look at how you can create a new custom tool.
+
+### Adding new tools
+
+In this section, we show how to create a new tool that can be added to the agent.
+
+#### Creating a new tool
+
+We'll first start by creating a tool. We'll add the not-so-useful yet fun task of fetching the model on the Hugging Face
+Hub with the most downloads for a given task.
+
+We can do that with the following code:
+
+```python
+from huggingface_hub import list_models
+
+task = "text-classification"
+
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
+```
+
+For the task `text-classification`, this returns `'facebook/bart-large-mnli'`, for `translation` it returns `'google-t5/t5-base`.
+
+How do we convert this to a tool that the agent can leverage? All tools depend on the superclass `Tool` that holds the
+main attributes necessary. We'll create a class that inherits from it:
+
+```python
+from transformers import Tool
+
+
+class HFModelDownloadsTool(Tool):
+    pass
+```
+
+This class has a few needs:
+- An attribute `name`, which corresponds to the name of the tool itself. To be in tune with other tools which have a
+  performative name, we'll name it `model_download_counter`.
+- An attribute `description`, which will be used to populate the prompt of the agent.
+- `inputs` and `outputs` attributes. Defining this will help the python interpreter make educated choices about types,
+  and will allow for a gradio-demo to be spawned when we push our tool to the Hub. They're both a list of expected
+  values, which can be `text`, `image`, or `audio`.
+- A `__call__` method which contains the inference code. This is the code we've played with above!
+
+Here's what our class looks like now:
+
+```python
+from transformers import Tool
+from huggingface_hub import list_models
+
+
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It takes the name of the category (such as text-classification, depth-estimation, etc), and "
+        "returns the name of the checkpoint."
+    )
+
+    inputs = ["text"]
+    outputs = ["text"]
+
+    def __call__(self, task: str):
+        model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+        return model.id
+```
+
+We now have our tool handy. Save it in a file and import it from your main script. Let's name this file
+`model_downloads.py`, so the resulting import code looks like this:
+
+```python
+from model_downloads import HFModelDownloadsTool
+
+tool = HFModelDownloadsTool()
+```
+
+In order to let others benefit from it and for simpler initialization, we recommend pushing it to the Hub under your 
+namespace. To do so, just call `push_to_hub` on the `tool` variable:
+
+```python
+tool.push_to_hub("hf-model-downloads")
+```
+
+You now have your code on the Hub! Let's take a look at the final step, which is to have the agent use it.
+
+#### Having the agent use the tool
+
+We now have our tool that lives on the Hub which can be instantiated as such (change the user name for your tool):
+
+```python
+from transformers import load_tool
+
+tool = load_tool("lysandre/hf-model-downloads")
+```
+
+In order to use it in the agent, simply pass it in the `additional_tools` parameter of the agent initialization method:
+
+```python
+from transformers import HfAgent
+
+agent = HfAgent("https://api-inference.huggingface.co/models/bigcode/starcoder", additional_tools=[tool])
+
+agent.run(
+    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
+```
+which outputs the following:
+```text
+==Code generated by the agent==
+model = model_download_counter(task="text-to-video")
+print(f"The model with the most downloads is {model}.")
+audio_model = text_reader(model)
+
+
+==Result==
+The model with the most downloads is damo-vilab/text-to-video-ms-1.7b.
+```
+
+and generates the following audio.
+
+| **Audio**                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+| <audio controls><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/damo.wav" type="audio/wav"/> |
+
+
+<Tip>
+
+Depending on the LLM, some are quite brittle and require very exact prompts in order to work well. Having a well-defined
+name and description of the tool is paramount to having it be leveraged by the agent.
+
+</Tip>
+
+### Replacing existing tools
+
+Replacing existing tools can be done simply by assigning a new item to the agent's toolbox. Here's how one would do so:
+
+```python
+from transformers import HfAgent, load_tool
+
+agent = HfAgent("https://api-inference.huggingface.co/models/bigcode/starcoder")
+agent.toolbox["image-transformation"] = load_tool("diffusers/controlnet-canny-tool")
+```
+
+<Tip>
+
+Beware when replacing tools with others! This will also adjust the agent's prompt. This can be good if you have a better
+prompt suited for the task, but it can also result in your tool being selected way more than others or for other
+tools to be selected instead of the one you have defined.
+
+</Tip>
+
+## Leveraging gradio-tools
+
+[gradio-tools](https://github.com/freddyaboulton/gradio-tools) is a powerful library that allows using Hugging
+Face Spaces as tools. It supports many existing Spaces as well as custom Spaces to be designed with it.
+
+We offer support for `gradio_tools` by using the `Tool.from_gradio` method. For example, we want to take
+advantage of the `StableDiffusionPromptGeneratorTool` tool offered in the `gradio-tools` toolkit so as to
+improve our prompts and generate better images.
+
+We first import the tool from `gradio_tools` and instantiate it:
+
+```python
+from gradio_tools import StableDiffusionPromptGeneratorTool
+
+gradio_tool = StableDiffusionPromptGeneratorTool()
+```
+
+We pass that instance to the `Tool.from_gradio` method:
+
+```python
+from transformers import Tool
+
+tool = Tool.from_gradio(gradio_tool)
+```
+
+Now we can manage it exactly as we would a usual custom tool. We leverage it to improve our prompt
+` a rabbit wearing a space suit`:
+
+```python
+from transformers import HfAgent
+
+agent = HfAgent("https://api-inference.huggingface.co/models/bigcode/starcoder", additional_tools=[tool])
+
+agent.run("Generate an image of the `prompt` after improving it.", prompt="A rabbit wearing a space suit")
+```
+
+The model adequately leverages the tool:
+```text
+==Explanation from the agent==
+I will use the following  tools: `StableDiffusionPromptGenerator` to improve the prompt, then `image_generator` to generate an image according to the improved prompt.
+
+
+==Code generated by the agent==
+improved_prompt = StableDiffusionPromptGenerator(prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(improved_prompt)
+```
+
+Before finally generating the image:
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png">
+
+<Tip warning={true}>
+
+gradio-tools requires *textual* inputs and outputs, even when working with different modalities. This implementation
+works with image and audio objects. The two are currently incompatible, but will rapidly become compatible as we
+work to improve the support.
+
+</Tip>
+
+## Future compatibility with Langchain
+
+We love Langchain and think it has a very compelling suite of tools. In order to handle these tools,
+Langchain requires *textual* inputs and outputs, even when working with different modalities.
+This is often the serialized version (i.e., saved to disk) of the objects.
+
+This difference means that multi-modality isn't handled between transformers-agents and langchain.
+We aim for this limitation to be resolved in future versions, and welcome any help from avid langchain
+users to help us achieve this compatibility.
+
+We would love to have better support. If you would like to help, please 
+[open an issue](https://github.com/huggingface/transformers/issues/new) and share what you have in mind.
--- a/docs/source/en/debugging.md
+++ b/docs/source/en/debugging.md
@ -203,7 +203,7 @@ This feature can be used with any `nn.Module`-based model.

 </Tip>

-If you start getting `loss=NaN` or the model exhibits some other abnormal behavior due to `inf` or `nan` in
+If you start getting `loss=NaN` or the model inhibits some other abnormal behavior due to `inf` or `nan` in
 activations or weights one needs to discover where the first underflow or overflow happens and what led to it. Luckily
 you can accomplish that easily by activating a special module that will do the detection automatically.

--- a/docs/source/en/deepspeed.md
+++ b/docs/source/en/deepspeed.md
@ -16,11 +16,11 @@ rendered properly in your Markdown viewer.

 # DeepSpeed

-[DeepSpeed](https://www.deepspeed.ai/) is a PyTorch optimization library that makes distributed training memory-efficient and fast. At its core is the [Zero Redundancy Optimizer (ZeRO)](https://hf.co/papers/1910.02054) which enables training large models at scale. ZeRO works in several stages:
+[DeepSpeed](https://www.deepspeed.ai/) is a PyTorch optimization library that makes distributed training memory-efficient and fast. At it's core is the [Zero Redundancy Optimizer (ZeRO)](https://hf.co/papers/1910.02054) which enables training large models at scale. ZeRO works in several stages:

-* ZeRO-1, optimizer state partitioning across GPUs
+* ZeRO-1, optimizer state partioning across GPUs
 * ZeRO-2, gradient partitioning across GPUs
-* ZeRO-3, parameter partitioning across GPUs
+* ZeRO-3, parameteter partitioning across GPUs

 In GPU-limited environments, ZeRO also enables offloading optimizer memory and computation from the GPU to the CPU to fit and train really large models on a single GPU. DeepSpeed is integrated with the Transformers [`Trainer`] class for all ZeRO stages and offloading. All you need to do is provide a config file or you can use a provided template. For inference, Transformers support ZeRO-3 and offloading since it allows loading huge models.

@ -159,7 +159,7 @@ There are three types of configuration parameters:

 You could also modify the DeepSpeed configuration and edit [`TrainingArguments`] from it:

-1. Create or load a DeepSpeed configuration to use as the main configuration
+1. Create or load a DeepSpeed configuration to used as the main configuration
 2. Create a [`TrainingArguments`] object based on these DeepSpeed configuration values

 Some values, such as `scheduler.params.total_num_steps` are calculated by the [`Trainer`] during training.
@ -191,7 +191,7 @@ ZeRO-1 shards the optimizer states across GPUs, and you can expect a tiny speed
 </hfoption>
 <hfoption id="ZeRO-2">

-ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since its features are not relevant to inference. Some important parameters to configure for better performance include:
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:

 * `offload_optimizer` should be enabled to reduce GPU memory usage.
 * `overlap_comm` when set to `true` trades off increased GPU memory usage to lower allreduce latency. This feature uses 4.5x the `allgather_bucket_size` and `reduce_bucket_size` values. In this example, they're set to `5e8` which means it requires 9GB of GPU memory. If your GPU memory is 8GB or less, you should reduce `overlap_comm` to lower the memory requirements and prevent an out-of-memory (OOM) error.
@ -226,7 +226,7 @@ ZeRO-3 shards the optimizer, gradient, and parameters across GPUs. Unlike ZeRO-2
 * `pin_memory: true` can improve throughput, but less memory becomes available for other processes because the pinned memory is reserved for the specific process that requested it and it's typically accessed much faster than normal CPU memory.
 * `stage3_max_live_parameters` is the upper limit on how many full parameters you want to keep on the GPU at any given time. Reduce this value if you encounter an OOM error.
 * `stage3_max_reuse_distance` is a value for determining when a parameter is used again in the future, and it helps decide whether to throw the parameter away or to keep it. If the parameter is going to be reused (if the value is less than `stage3_max_reuse_distance`), then it is kept to reduce communication overhead. This is super helpful when activation checkpointing is enabled and you want to keep the parameter in the forward recompute until the backward pass. But reduce this value if you encounter an OOM error.
-* `stage3_gather_16bit_weights_on_model_save` consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
+* `stage3_gather_16bit_weights_on_model_save` consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is an expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
 * `sub_group_size` controls which parameters are updated during the optimizer step. Parameters are grouped into buckets of `sub_group_size` and each bucket is updated one at a time. When used with NVMe offload, `sub_group_size` determines when model states are moved in and out of CPU memory from during the optimization step. This prevents running out of CPU memory for extremely large models. `sub_group_size` can be left to its default value if you aren't using NVMe offload, but you may want to change it if you:

    1. Run into an OOM error during the optimizer step. In this case, reduce `sub_group_size` to reduce memory usage of the temporary buffers.
--- a/Show More
+++ b/Show More