handle when from_pretrained_id is a list

2025-11-05 12:54:35 +08:00 · 2024-06-21 19:59:28 +02:00
3106 changed files with 76888 additions and 323003 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -13,7 +13,6 @@ jobs:
    check_circleci_user:
        docker:
            - image: python:3.10-slim
        resource_class: small
        parallelism: 1
        steps:
            - run: echo $CIRCLE_PROJECT_USERNAME
@ -35,44 +34,64 @@ jobs:
            - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
            - run: mkdir -p test_preparation
            - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
            - store_artifacts:
                  path: ~/transformers/tests_fetched_summary.txt
            - run: |
                if [ -f test_list.txt ]; then
                    cp test_list.txt test_preparation/test_list.txt
                else
                    touch test_preparation/test_list.txt
                fi
            - run: |
                  if [ -f examples_test_list.txt ]; then
                      mv examples_test_list.txt test_preparation/examples_test_list.txt
                  else
                      touch test_preparation/examples_test_list.txt
                  fi
            - run: |
                  if [ -f filtered_test_list_cross_tests.txt ]; then
                      mv filtered_test_list_cross_tests.txt test_preparation/filtered_test_list_cross_tests.txt
                  else
                      touch test_preparation/filtered_test_list_cross_tests.txt
                  fi
            - run: |
                if [ -f doctest_list.txt ]; then
                    cp doctest_list.txt test_preparation/doctest_list.txt
                else
                    touch test_preparation/doctest_list.txt
                fi
            - run: |
                if [ -f test_repo_utils.txt ]; then
                    mv test_repo_utils.txt test_preparation/test_repo_utils.txt
                else
                    touch test_preparation/test_repo_utils.txt
                fi
            - run: python utils/tests_fetcher.py --filter_tests
            - run: |
                if [ -f test_list.txt ]; then
                    mv test_list.txt test_preparation/filtered_test_list.txt
                else
                    touch test_preparation/filtered_test_list.txt
                fi
            - store_artifacts:
                  path: test_preparation/test_list.txt
            - store_artifacts:
                  path: test_preparation/doctest_list.txt
            - store_artifacts:
                  path: ~/transformers/test_preparation/filtered_test_list.txt
            - store_artifacts:
                  path: test_preparation/examples_test_list.txt
            - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
            - run: |
-                if [ ! -s test_preparation/generated_config.yml ]; then
+                  if [ ! -s test_preparation/generated_config.yml ]; then
-                    echo "No tests to run, exiting early!"
+                      echo "No tests to run, exiting early!"
-                    circleci-agent step halt
+                      circleci-agent step halt
-                fi
+                  fi
            - store_artifacts:
-                path: test_preparation
+                path: test_preparation/generated_config.yml
            - run:
                name: "Retrieve Artifact Paths"
                # [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts
                # `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/`
                command: |
                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
                    job_number=${CIRCLE_BUILD_NUM}
                    url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
                    curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN"
            - run:
                name: "Prepare pipeline parameters"
                command: |
                    python utils/process_test_artifacts.py 
            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
            # We used:
            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
            - store_artifacts:
-                path: test_preparation/transformed_artifacts.json
+                path: test_preparation/filtered_test_list_cross_tests.txt
            - store_artifacts:
                path: test_preparation/artifacts.json
            - continuation/continue:
                parameters:  test_preparation/transformed_artifacts.json
                configuration_path: test_preparation/generated_config.yml
    # To run all tests for the nightly build
@ -83,49 +102,22 @@ jobs:
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -U -e .
+            - run: uv pip install -e .
            - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
            - run: mkdir -p test_preparation
            - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt
            - run: python utils/tests_fetcher.py --filter_tests
            - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
            - run: |
-                if [ ! -s test_preparation/generated_config.yml ]; then
+                  mkdir test_preparation
-                    echo "No tests to run, exiting early!"
+                  echo -n "tests" > test_preparation/test_list.txt
-                    circleci-agent step halt
+                  echo -n "all" > test_preparation/examples_test_list.txt
-                fi
+                  echo -n "tests/repo_utils" > test_preparation/test_repo_utils.txt
-
+            - run: |
                  echo -n "tests" > test_list.txt
                  python utils/tests_fetcher.py --filter_tests
                  mv test_list.txt test_preparation/filtered_test_list.txt
            - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation
            - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt
            - store_artifacts:
-                path: test_preparation
+                  path: test_preparation/generated_config.txt
            - run:
                name: "Retrieve Artifact Paths"
                env:
                    CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
                command: |
                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
                    job_number=${CIRCLE_BUILD_NUM}
                    url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
                    curl -o  test_preparation/artifacts.json ${url}
            - run:
                name: "Prepare pipeline parameters"
                command: |
                    python utils/process_test_artifacts.py 
            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
            # We used:
            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
            - store_artifacts:
                path: test_preparation/transformed_artifacts.json
            - store_artifacts:
                path: test_preparation/artifacts.json
            - continuation/continue:
-                parameters:  test_preparation/transformed_artifacts.json
+                  configuration_path: test_preparation/generated_config.yml
                configuration_path: test_preparation/generated_config.yml
    check_code_quality:
        working_directory: ~/transformers
@ -138,7 +130,7 @@ jobs:
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -e ".[quality]"
+            - run: uv pip install -e .
            - run:
                name: Show installed libraries and their versions
                command: pip freeze | tee installed.txt
@ -150,7 +142,6 @@ jobs:
            - run: python utils/custom_init_isort.py --check_only
            - run: python utils/sort_auto_mappings.py --check_only
            - run: python utils/check_doc_toc.py
            - run: python utils/check_docstrings.py --check_all
    check_repository_consistency:
        working_directory: ~/transformers
@ -163,14 +154,13 @@ jobs:
        parallelism: 1
        steps:
            - checkout
-            - run: uv pip install -e ".[quality]"
+            - run: uv pip install -e .
            - run:
                name: Show installed libraries and their versions
                command: pip freeze | tee installed.txt
            - store_artifacts:
                  path: ~/transformers/installed.txt
            - run: python utils/check_copies.py
            - run: python utils/check_modular_conversion.py
            - run: python utils/check_table.py
            - run: python utils/check_dummies.py
            - run: python utils/check_repo.py
@ -187,32 +177,17 @@ workflows:
    version: 2
    setup_and_quality:
        when:
-            and:
+            not: <<pipeline.parameters.nightly>>
                - equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
                - not: <<pipeline.parameters.nightly>>
        jobs:
            - check_circleci_user
            - check_code_quality
            - check_repository_consistency
            - fetch_tests
    setup_and_quality_2:
        when:
            not:
                 equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
        jobs:
            - check_circleci_user
            - check_code_quality
            - check_repository_consistency
            - fetch_tests:
                # [reference] https://circleci.com/docs/contexts/
                context:
                    - TRANSFORMERS_CONTEXT
    nightly:
        when: <<pipeline.parameters.nightly>>
        jobs:
            - check_circleci_user
            - check_code_quality
            - check_repository_consistency
-            - fetch_all_tests
+            - fetch_all_tests
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -32,7 +32,7 @@ COMMON_ENV_VARIABLES = {
    "RUN_PT_FLAX_CROSS_TESTS": False,
 }
 # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
-COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None}
+COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "v": None}
 DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
@ -40,23 +40,9 @@ class EmptyJob:
    job_name = "empty"
    def to_dict(self):
        steps = [{"run": 'ls -la'}]
        if self.job_name == "collection_job":
            steps.extend(
                [
                    "checkout",
                    {"run": "pip install requests || true"},
                    {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
                    {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
                    {"store_artifacts": {"path": "outputs"}},
                    {"run": 'echo "All required jobs have now completed"'},
                ]
            )
        return {
            "docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
-            "resource_class": "small",
+            "steps":["checkout"],
            "steps": steps,
        }
@ -64,15 +50,16 @@ class EmptyJob:
 class CircleCIJob:
    name: str
    additional_env: Dict[str, Any] = None
    cache_name: str = None
    cache_version: str = "0.8.2"
    docker_image: List[Dict[str, str]] = None
    install_steps: List[str] = None
    marker: Optional[str] = None
-    parallelism: Optional[int] = 0
+    parallelism: Optional[int] = 1
-    pytest_num_workers: int = 8
+    pytest_num_workers: int = 12
    pytest_options: Dict[str, Any] = None
-    resource_class: Optional[str] = "xlarge"
+    resource_class: Optional[str] = "2xlarge"
    tests_to_run: Optional[List[str]] = None
    num_test_files_per_worker: Optional[int] = 10
    # This should be only used for doctest job!
    command_timeout: Optional[int] = None
@ -80,6 +67,8 @@ class CircleCIJob:
        # Deal with defaults for mutable attributes.
        if self.additional_env is None:
            self.additional_env = {}
        if self.cache_name is None:
            self.cache_name = self.name
        if self.docker_image is None:
            # Let's avoid changing the default list and make a copy.
            self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE)
@ -90,96 +79,155 @@ class CircleCIJob:
                self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
            print(f"Using {self.docker_image} docker image")
        if self.install_steps is None:
-            self.install_steps = ["uv venv && uv pip install ."]
+            self.install_steps = []
        if self.pytest_options is None:
            self.pytest_options = {}
        if isinstance(self.tests_to_run, str):
            self.tests_to_run = [self.tests_to_run]
-        else:
+        if self.parallelism is None:
-            test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt")
+            self.parallelism = 1
            print("Looking for ", test_file)
            if os.path.exists(test_file):
                with open(test_file) as f:
                    expanded_tests = f.read().strip().split("\n")
                self.tests_to_run = expanded_tests
                print("Found:", expanded_tests)
            else:
                self.tests_to_run = []
                print("not Found")
    def to_dict(self):
        env = COMMON_ENV_VARIABLES.copy()
        env.update(self.additional_env)
        cache_branch_prefix = os.environ.get("CIRCLE_BRANCH", "pull")
        if cache_branch_prefix != "main":
            cache_branch_prefix = "pull"
        job = {
            "docker": self.docker_image,
            "environment": env,
        }
        if self.resource_class is not None:
            job["resource_class"] = self.resource_class
        if self.parallelism is not None:
            job["parallelism"] = self.parallelism
        steps = [
            "checkout",
            {"attach_workspace": {"at": "test_preparation"}},
        ]
        steps.extend([{"run": l} for l in self.install_steps])
        steps.append({"run": {"name": "Show installed libraries and their size", "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}})
        steps.append({"run": {"name": "Show installed libraries and their versions", "command": """pip list --format=freeze | tee installed.txt || true"""}})
        steps.append({"run":{"name":"Show biggest libraries","command":"""dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}})
        steps.append({"store_artifacts": {"path": "installed.txt"}})
        all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
        pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
        pytest_flags.append(
            f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
        )
-                # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
+
-        timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
+        steps.append({"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}})
-        marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
+        test_command = ""
-        additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
+        if self.command_timeout:
-        parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
+            test_command = f"timeout {self.command_timeout} "
-        steps = [
+        # junit familiy xunit1 is necessary to support splitting on test name or class name with circleci split
-            "checkout",
+        test_command += f"python3 -m pytest -rsfE -p no:warnings -o junit_family=xunit1 --tb=short --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags)
-            {"attach_workspace": {"at": "test_preparation"}},
+
-            {"run": "apt-get update && apt-get install -y curl"},
+        if self.parallelism == 1:
-            {"run": " && ".join(self.install_steps)},
+            if self.tests_to_run is None:
-            {"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"},
+                test_command += " << pipeline.parameters.tests_to_run >>"
-            {"run": {
+            else:
-                    "name": "Show installed libraries and their size",
+                test_command += " " + " ".join(self.tests_to_run)
-                    "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}
+        else:
-            },
+            # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime)
-            {"run": {
+            tests = self.tests_to_run
-                "name": "Show installed libraries and their versions",
+            if tests is None:
-                "command": """pip list --format=freeze | tee installed.txt || true"""}
+                folder = os.environ["test_preparation_dir"]
-            },
+                test_file = os.path.join(folder, "filtered_test_list.txt")
-            {"run": {
+                if os.path.exists(test_file): # We take this job's tests from the filtered test_list.txt
-                "name": "Show biggest libraries",
+                    with open(test_file) as f:
-                "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
+                        tests = f.read().split(" ")
-            },
+
-            {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
+            # expand the test list
-            {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
+            if tests == ["tests"]:
-                        {"run": {"name": "Split tests across parallel nodes: show current parallel tests",
+                tests = [os.path.join("tests", x) for x in os.listdir("tests")]
-                    "command": f"TESTS=$(circleci tests split  --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
+            expanded_tests = []
-                    }
+            for test in tests:
-            },
+                if test.endswith(".py"):
-            {"run": {
+                    expanded_tests.append(test)
-                "name": "Run tests",
+                elif test == "tests/models":
-                "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
+                    if "tokenization" in self.name:
-            },
+                        expanded_tests.extend(glob.glob("tests/models/**/test_tokenization*.py", recursive=True))
-            {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
+                    elif self.name in ["flax","torch","tf"]:
-            {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
+                        name = self.name if self.name != "torch" else ""
-            {"run": {"name": "Errors",                       "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
+                        if self.name == "torch":
-            {"store_test_results": {"path": "test-results"}},
+                            all_tests = glob.glob(f"tests/models/**/test_modeling_{name}*.py", recursive=True)
-            {"store_artifacts": {"path": "test-results/junit.xml"}},
+                            filtered = [k for k in all_tests if ("_tf_") not in k and "_flax_" not in k]
-            {"store_artifacts": {"path": "reports"}},
+                            expanded_tests.extend(filtered)
-            {"store_artifacts": {"path": "tests.txt"}},
+                        else:
-            {"store_artifacts": {"path": "splitted_tests.txt"}},
+                            expanded_tests.extend(glob.glob(f"tests/models/**/test_modeling_{name}*.py", recursive=True))
-            {"store_artifacts": {"path": "installed.txt"}},
+                    else:
-        ]
+                        expanded_tests.extend(glob.glob("tests/models/**/test_modeling*.py", recursive=True))
-        if self.parallelism:
+                elif test == "tests/pipelines":
-            job["parallelism"] = parallel
+                    expanded_tests.extend(glob.glob("tests/models/**/test_modeling*.py", recursive=True))
                else:
                    expanded_tests.append(test)
            tests = " ".join(expanded_tests)
            # Each executor to run ~10 tests
            n_executors = max(len(expanded_tests) // 10, 1)
            # Avoid empty test list on some executor(s) or launching too many executors
            if n_executors > self.parallelism:
                n_executors = self.parallelism
            job["parallelism"] = n_executors
            # Need to be newline separated for the command `circleci tests split` below
            command = f'echo {tests} | tr " " "\\n" >> tests.txt'
            steps.append({"run": {"name": "Get tests", "command": command}})
            command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt'
            steps.append({"run": {"name": "Split tests", "command": command}})
            steps.append({"store_artifacts": {"path": "tests.txt"}})
            steps.append({"store_artifacts": {"path": "splitted_tests.txt"}})
            test_command = ""
            if self.command_timeout:
                test_command = f"timeout {self.command_timeout} "
            test_command += f"python3 -m pytest -rsfE -p no:warnings --tb=short  -o junit_family=xunit1 --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags)
            test_command += " $(cat splitted_tests.txt)"
        if self.marker is not None:
            test_command += f" -m {self.marker}"
        if self.name == "pr_documentation_tests":
            # can't use ` | tee tee tests_output.txt` as usual
            test_command += " > tests_output.txt"
            # Save the return code, so we can check if it is timeout in the next step.
            test_command += '; touch "$?".txt'
            # Never fail the test step for the doctest job. We will check the results in the next step, and fail that
            # step instead if the actual test failures are found. This is to avoid the timeout being reported as test
            # failure.
            test_command = f"({test_command}) || true"
        else:
            test_command = f"({test_command} | tee tests_output.txt)"
        steps.append({"run": {"name": "Run tests", "command": test_command}})
        steps.append({"run": {"name": "Skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}})
        steps.append({"run": {"name": "Failed tests",  "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}})
        steps.append({"run": {"name": "Errors",        "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}})
        steps.append({"store_test_results": {"path": "test-results"}})
        steps.append({"store_artifacts": {"path": "tests_output.txt"}})
        steps.append({"store_artifacts": {"path": "test-results/junit.xml"}})
        steps.append({"store_artifacts": {"path": "reports"}})
        job["steps"] = steps
        return job
    @property
    def job_name(self):
-        return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}"
+        return self.name if "examples" in self.name else f"tests_{self.name}"
 # JOBS
 torch_and_tf_job = CircleCIJob(
    "torch_and_tf",
    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
    install_steps=["uv venv && uv pip install ."],
    additional_env={"RUN_PT_TF_CROSS_TESTS": True},
    marker="is_pt_tf_cross_test",
    pytest_options={"rA": None, "durations": 0},
@ -190,6 +238,7 @@ torch_and_flax_job = CircleCIJob(
    "torch_and_flax",
    additional_env={"RUN_PT_FLAX_CROSS_TESTS": True},
    docker_image=[{"image":"huggingface/transformers-torch-jax-light"}],
    install_steps=["uv venv && uv pip install ."],
    marker="is_pt_flax_cross_test",
    pytest_options={"rA": None, "durations": 0},
 )
@ -197,42 +246,35 @@ torch_and_flax_job = CircleCIJob(
 torch_job = CircleCIJob(
    "torch",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    marker="not generate",
+    install_steps=["uv venv && uv pip install ."],
    parallelism=6,
 )
 generate_job = CircleCIJob(
    "generate",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    marker="generate",
    parallelism=6,
    pytest_num_workers=16
 )
 tokenization_job = CircleCIJob(
    "tokenization",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    parallelism=8,
+    install_steps=["uv venv && uv pip install ."],
    parallelism=6,
    pytest_num_workers=16
 )
 processor_job = CircleCIJob(
    "processors",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    parallelism=8,
 )
 tf_job = CircleCIJob(
    "tf",
    docker_image=[{"image":"huggingface/transformers-tf-light"}],
    install_steps=["uv venv", "uv pip install -e."],
    parallelism=6,
    pytest_num_workers=16,
 )
 flax_job = CircleCIJob(
    "flax",
    docker_image=[{"image":"huggingface/transformers-jax-light"}],
    install_steps=["uv venv && uv pip install ."],
    parallelism=6,
-    pytest_num_workers=16,
+    pytest_num_workers=16
    resource_class="2xlarge",
 )
@ -240,8 +282,8 @@ pipelines_torch_job = CircleCIJob(
    "pipelines_torch",
    additional_env={"RUN_PIPELINE_TESTS": True},
    docker_image=[{"image":"huggingface/transformers-torch-light"}],
    install_steps=["uv venv && uv pip install ."],
    marker="is_pipeline_test",
    parallelism=4,
 )
@ -249,8 +291,8 @@ pipelines_tf_job = CircleCIJob(
    "pipelines_tf",
    additional_env={"RUN_PIPELINE_TESTS": True},
    docker_image=[{"image":"huggingface/transformers-tf-light"}],
    install_steps=["uv venv && uv pip install ."],
    marker="is_pipeline_test",
    parallelism=4,
 )
@ -258,22 +300,34 @@ custom_tokenizers_job = CircleCIJob(
    "custom_tokenizers",
    additional_env={"RUN_CUSTOM_TOKENIZERS": True},
    docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}],
    install_steps=["uv venv","uv pip install -e ."],
    parallelism=None,
    resource_class=None,
    tests_to_run=[
        "./tests/models/bert_japanese/test_tokenization_bert_japanese.py",
        "./tests/models/openai/test_tokenization_openai.py",
        "./tests/models/clip/test_tokenization_clip.py",
    ],
 )
 examples_torch_job = CircleCIJob(
    "examples_torch",
    additional_env={"OMP_NUM_THREADS": 8},
    cache_name="torch_examples",
    docker_image=[{"image":"huggingface/transformers-examples-torch"}],
    # TODO @ArthurZucker remove this once docker is easier to build
    install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
    pytest_num_workers=1,
 )
 examples_tensorflow_job = CircleCIJob(
    "examples_tensorflow",
-    additional_env={"OMP_NUM_THREADS": 8},
+    cache_name="tensorflow_examples",
    docker_image=[{"image":"huggingface/transformers-examples-tf"}],
    install_steps=["uv venv && uv pip install . && uv pip install -r examples/tensorflow/_tests_requirements.txt"],
    parallelism=8
 )
@ -282,13 +336,12 @@ hub_job = CircleCIJob(
    additional_env={"HUGGINGFACE_CO_STAGING": True},
    docker_image=[{"image":"huggingface/transformers-torch-light"}],
    install_steps=[
-        'uv venv && uv pip install .',
+        "uv venv && uv pip install .",
        'git config --global user.email "ci@dummy.com"',
        'git config --global user.name "ci"',
    ],
    marker="is_staging_test",
-    pytest_num_workers=2,
+    pytest_num_workers=1,
    resource_class="medium",
 )
@ -296,18 +349,27 @@ onnx_job = CircleCIJob(
    "onnx",
    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
    install_steps=[
-        "uv venv",
+        "uv venv && uv pip install .",
        "uv pip install --upgrade eager pip",
        "uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]",
    ],
    pytest_options={"k onnx": None},
    pytest_num_workers=1,
    resource_class="small",
 )
 exotic_models_job = CircleCIJob(
    "exotic_models",
    install_steps=["uv venv && uv pip install ."],
    docker_image=[{"image":"huggingface/transformers-exotic-models"}],
    tests_to_run=[
        "tests/models/*layoutlmv*",
        "tests/models/*nat",
        "tests/models/deta",
        "tests/models/udop",
        "tests/models/nougat",
    ],
    pytest_num_workers=12,
    parallelism=4,
    pytest_options={"durations": 100},
 )
@ -316,16 +378,11 @@ exotic_models_job = CircleCIJob(
 repo_utils_job = CircleCIJob(
    "repo_utils",
    docker_image=[{"image":"huggingface/transformers-consistency"}],
-    pytest_num_workers=4,
+    install_steps=["uv venv && uv pip install ."],
    parallelism=None,
    pytest_num_workers=1,
    resource_class="large",
-)
+    tests_to_run="tests/repo_utils",
 non_model_job = CircleCIJob(
    "non_model",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    marker="not generate",
    parallelism=6,
 )
@ -334,18 +391,28 @@ non_model_job = CircleCIJob(
 # the bash output redirection.)
 py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
 py_command = f"$(python3 -c '{py_command}')"
-command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'
+command = f'echo "{py_command}" > pr_documentation_tests_temp.txt'
 doc_test_job = CircleCIJob(
    "pr_documentation_tests",
    docker_image=[{"image":"huggingface/transformers-consistency"}],
    additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
    install_steps=[
        # Add an empty file to keep the test step running correctly even no file is selected to be tested.
        "uv venv && pip install .",
        "touch dummy.py",
-        command,
+        {
-        "cat pr_documentation_tests_temp.txt",
+            "name": "Get files to test",
-        "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt"
+            "command": command,
        },
        {
            "name": "Show information in `Get files to test`",
            "command":
                "cat pr_documentation_tests_temp.txt"
        },
        {
            "name": "Get the last line in `pr_documentation_tests.txt`",
            "command":
                "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests.txt"
        },
    ],
    tests_to_run="$(cat pr_documentation_tests.txt)",  # noqa
    pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None},
@ -353,49 +420,121 @@ doc_test_job = CircleCIJob(
    pytest_num_workers=1,
 )
-REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
+REGULAR_TESTS = [
-EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
+    torch_and_tf_job,
-PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
+    torch_and_flax_job,
    torch_job,
    tf_job,
    flax_job,
    custom_tokenizers_job,
    hub_job,
    onnx_job,
    exotic_models_job,
    tokenization_job
 ]
 EXAMPLES_TESTS = [
    examples_torch_job,
    examples_tensorflow_job,
 ]
 PIPELINE_TESTS = [
    pipelines_torch_job,
    pipelines_tf_job,
 ]
 REPO_UTIL_TESTS = [repo_utils_job]
 DOC_TESTS = [doc_test_job]
 ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job]  # fmt: skip
 def create_circleci_config(folder=None):
    if folder is None:
        folder = os.getcwd()
    # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
    os.environ["test_preparation_dir"] = folder
-    jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )]
+    jobs = []
-    print("The following jobs will be run ", jobs)
+    all_test_file = os.path.join(folder, "test_list.txt")
    if os.path.exists(all_test_file):
        with open(all_test_file) as f:
            all_test_list = f.read()
    else:
        all_test_list = []
    if len(all_test_list) > 0:
        jobs.extend(PIPELINE_TESTS)
    test_file = os.path.join(folder, "filtered_test_list.txt")
    if os.path.exists(test_file):
        with open(test_file) as f:
            test_list = f.read()
    else:
        test_list = []
    if len(test_list) > 0:
        jobs.extend(REGULAR_TESTS)
        extended_tests_to_run = set(test_list.split())
        # Extend the test files for cross test jobs
        for job in jobs:
            if job.job_name in ["tests_torch_and_tf", "tests_torch_and_flax"]:
                for test_path in copy.copy(extended_tests_to_run):
                    dir_path, fn = os.path.split(test_path)
                    if fn.startswith("test_modeling_tf_"):
                        fn = fn.replace("test_modeling_tf_", "test_modeling_")
                    elif fn.startswith("test_modeling_flax_"):
                        fn = fn.replace("test_modeling_flax_", "test_modeling_")
                    else:
                        if job.job_name == "test_torch_and_tf":
                            fn = fn.replace("test_modeling_", "test_modeling_tf_")
                        elif job.job_name == "test_torch_and_flax":
                            fn = fn.replace("test_modeling_", "test_modeling_flax_")
                    new_test_file = str(os.path.join(dir_path, fn))
                    if os.path.isfile(new_test_file):
                        if new_test_file not in extended_tests_to_run:
                            extended_tests_to_run.add(new_test_file)
        extended_tests_to_run = sorted(extended_tests_to_run)
        for job in jobs:
            if job.job_name in ["tests_torch_and_tf", "tests_torch_and_flax"]:
                job.tests_to_run = extended_tests_to_run
        fn = "filtered_test_list_cross_tests.txt"
        f_path = os.path.join(folder, fn)
        with open(f_path, "w") as fp:
            fp.write(" ".join(extended_tests_to_run))
    example_file = os.path.join(folder, "examples_test_list.txt")
    if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
        with open(example_file, "r", encoding="utf-8") as f:
            example_tests = f.read()
        for job in EXAMPLES_TESTS:
            framework = job.name.replace("examples_", "").replace("torch", "pytorch")
            if example_tests == "all":
                job.tests_to_run = [f"examples/{framework}"]
            else:
                job.tests_to_run = [f for f in example_tests.split(" ") if f.startswith(f"examples/{framework}")]
            if len(job.tests_to_run) > 0:
                jobs.append(job)
    doctest_file = os.path.join(folder, "doctest_list.txt")
    if os.path.exists(doctest_file):
        with open(doctest_file) as f:
            doctest_list = f.read()
    else:
        doctest_list = []
    if len(doctest_list) > 0:
        jobs.extend(DOC_TESTS)
    repo_util_file = os.path.join(folder, "test_repo_utils.txt")
    if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
        jobs.extend(REPO_UTIL_TESTS)
    if len(jobs) == 0:
        jobs = [EmptyJob()]
-    else:
+    config = {"version": "2.1"}
-        print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
+    config["parameters"] = {
-        # Add a job waiting all the test jobs and aggregate their test summary files at the end
+        # Only used to accept the parameters from the trigger
-        collection_job = EmptyJob()
+        "nightly": {"type": "boolean", "default": False},
-        collection_job.job_name = "collection_job"
+        "tests_to_run": {"type": "string", "default": test_list},
        jobs = [collection_job] + jobs
    config = {
        "version": "2.1",
        "parameters": {
            # Only used to accept the parameters from the trigger
            "nightly": {"type": "boolean", "default": False},
            "tests_to_run": {"type": "string", "default": ''},
            **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
            **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
        },
        "jobs": {j.job_name: j.to_dict() for j in jobs}
    }
-    if "CIRCLE_TOKEN" in os.environ:
+    config["jobs"] = {j.job_name: j.to_dict() for j in jobs}
-        # For private forked repo. (e.g. new model addition)
+    config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
        config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}}
    else:
        # For public repo. (e.g. `transformers`)
        config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
    with open(os.path.join(folder, "generated_config.yml"), "w") as f:
-        f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))
+        f.write(yaml.dump(config, indent=2, width=1000000, sort_keys=False))
 if __name__ == "__main__":
--- a/.circleci/parse_test_outputs.py
+++ b/.circleci/parse_test_outputs.py
@ -67,4 +67,4 @@ def main():
 if __name__ == "__main__":
-    main()
+    main()
--- a/.coveragerc
+++ b/.coveragerc
@ -0,0 +1,12 @@
 [run]
 source=transformers
 omit =
    # skip convertion scripts from testing for now
    */convert_*
    */__main__.py
 [report]
 exclude_lines =
    pragma: no cover
    raise
    except
    register_parameter
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -1,17 +1,6 @@
 name: "\U0001F41B Bug Report"
 description: Submit a bug report to help us improve transformers
 labels: [ "bug" ]
 body:
  - type: markdown
    attributes:
      value: |
        Thanks for taking the time to fill out this bug report! 🤗
        Before you submit your bug report:
          - If it is your first time submitting, be sure to check our [bug report guidelines](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#did-you-find-a-bug)
          - Try our [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat) -- it might be able to help you with your issue
  - type: textarea
    id: system-info
    attributes:
@ -36,26 +25,26 @@ body:
        Models:
-          - text models: @ArthurZucker
+          - text models: @ArthurZucker and @younesbelkada
-          - vision models: @amyeroberts, @qubvel
+          - vision models: @amyeroberts
-          - speech models: @ylacombe, @eustlb
+          - speech models: @sanchit-gandhi
          - graph models: @clefourrier
        Library:
          - flax: @sanchit-gandhi
          - generate: @zucchini-nlp (visual-language models) or @gante (all others)
-          - pipelines: @Rocketknight1
+          - pipelines: @Narsil
          - tensorflow: @gante and @Rocketknight1
-          - tokenizers: @ArthurZucker and @itazap
+          - tokenizers: @ArthurZucker
          - trainer: @muellerzr @SunMarc
-
+        
        Integrations:
-
+        
          - deepspeed: HF Trainer/Accelerate: @muellerzr
          - ray/raytune: @richardliaw, @amogkam
          - Big Model Inference: @SunMarc
-          - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
+          - quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada
        Documentation: @stevhliu
--- a/.github/ISSUE_TEMPLATE/i18n.md
+++ b/.github/ISSUE_TEMPLATE/i18n.md
@ -34,7 +34,7 @@ Some notes:
 ## Tutorial section
 - [ ] [pipeline_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md)
- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/autoclass_tutorial.md)
+- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/master/docs/source/autoclass_tutorial.md)
 - [ ]  [preprocessing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.md)
 - [ ]  [training.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.md)
 - [ ]  [accelerate.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.md)
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -39,29 +39,28 @@ members/contributors who may be interested in your PR.
 Models:
- text models: @ArthurZucker
+- text models: @ArthurZucker and @younesbelkada
- vision models: @amyeroberts, @qubvel
+- vision models: @amyeroberts
- speech models: @ylacombe, @eustlb
+- speech models: @sanchit-gandhi
 - graph models: @clefourrier
 Library:
 - flax: @sanchit-gandhi
 - generate: @zucchini-nlp (visual-language models) or @gante (all others)
- pipelines: @Rocketknight1
+- pipelines: @Narsil
 - tensorflow: @gante and @Rocketknight1
 - tokenizers: @ArthurZucker
 - trainer: @muellerzr and @SunMarc
 - chat templates: @Rocketknight1
 Integrations:
 - deepspeed: HF Trainer/Accelerate: @muellerzr
 - ray/raytune: @richardliaw, @amogkam
 - Big Model Inference: @SunMarc
- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
+- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada
-Documentation: @stevhliu
+Documentation: @stevhliu and @MKhalusova
 HF projects:
--- a/.github/workflows/add-model-like.yml
+++ b/.github/workflows/add-model-like.yml
@ -23,7 +23,7 @@ jobs:
          sudo apt -y update && sudo apt install -y libsndfile1-dev
      - name: Load cached virtual environment
-        uses: actions/cache@v4
+        uses: actions/cache@v2
        id: cache
        with:
          path: ~/venv/
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -1,75 +1,42 @@
 name: Self-hosted runner (benchmark)
 on:
-  push:
+  schedule:
-    branches: [main]
+    - cron: "17 2 * * *"
-  pull_request:
+  workflow_call:
    types: [ opened, labeled, reopened, synchronize ]
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 env:
  HF_HOME: /mnt/cache
  TF_FORCE_GPU_ALLOW_GROWTH: true
 jobs:
  benchmark:
    name: Benchmark
-    strategy:
+    runs-on: [single-gpu, nvidia-gpu, a10, ci]
      matrix:
        # group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled)
        group: [aws-g5-4xlarge-cache]
    runs-on:
      group: ${{ matrix.group }}
    if: |
      (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
      (github.event_name == 'push' && github.ref == 'refs/heads/main')
    container:
-      image: huggingface/transformers-pytorch-gpu
+      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --privileged --ipc host
+      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - name: Get repo
+      - name: Update clone
-        uses: actions/checkout@v4
+        working-directory: /transformers
        with:
          ref: ${{ github.event.pull_request.head.sha || github.sha }}
      - name: Install libpq-dev & psql
        run: |
-          apt update
+          git fetch && git checkout ${{ github.sha }}
          apt install -y libpq-dev postgresql-client
      - name: Install benchmark script dependencies
        run: python3 -m pip install -r benchmark/requirements.txt
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]"
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-      - name: Run database init script
+      - name: Benchmark (daily)
        if: github.event_name == 'schedule'
        working-directory: /transformers
        run: |
-          psql -f benchmark/init_db.sql
+          python3 -m pip install optimum-benchmark>=0.2.0
-        env:
+          HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
          PGDATABASE: metrics
          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
          PGUSER: transformers_benchmarks
          PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
-      - name: Run benchmark
+      - name: Benchmark (merged to main event)
        if: github.event_name == 'push' && github.ref_name == 'main'
        working-directory: /transformers
        run: |
-          git config --global --add safe.directory /__w/transformers/transformers
+          python3 -m pip install optimum-benchmark>=0.2.0
-          if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
+          HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results_merge_event --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
            commit_id=$(echo "${{ github.event.pull_request.head.sha }}")
          elif [ "$GITHUB_EVENT_NAME" = "push" ]; then
            commit_id=$GITHUB_SHA
          fi
          commit_msg=$(git show -s --format=%s | cut -c1-70)
          python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
        env:
          HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
          # Enable this to see debug logs
          # HF_HUB_VERBOSITY: debug
          # TRANSFORMERS_VERBOSITY: debug
          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
          PGUSER: transformers_benchmarks
          PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
--- a/.github/workflows/build-ci-docker-images.yml
+++ b/.github/workflows/build-ci-docker-images.yml
@ -27,10 +27,10 @@ jobs:
    strategy:
      matrix:
        file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch",  "examples-tf"]
-    continue-on-error: true
+    continue-on-error: true 
    steps:
-      -
+      - 
        name: Set tag
        run: |
              if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
@ -61,17 +61,4 @@ jobs:
            REF=${{ github.sha }}
          file: "./docker/${{ matrix.file }}.dockerfile"
          push: ${{ contains(github.event.head_commit.message, 'ci-image]') ||  github.event_name == 'schedule' }}
-          tags: ${{ env.TAG }}
+          tags: ${{ env.TAG }}
  notify:
    runs-on: ubuntu-22.04
    if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
    steps:
      - name: Post to Slack
        if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }}
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: "#transformers-ci-circleci-images"
          title: 🤗 New docker images for CircleCI are pushed.
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@ -20,8 +20,7 @@ concurrency:
 jobs:
  latest-docker:
    name: "Latest PyTorch + TensorFlow [dev]"
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
@ -69,8 +68,7 @@ jobs:
  latest-torch-deepspeed-docker:
    name: "Latest PyTorch + DeepSpeed"
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
@ -106,8 +104,7 @@ jobs:
  # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
  latest-torch-deepspeed-docker-for-push-ci-daily-build:
    name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
@ -148,8 +145,7 @@ jobs:
    name: "Doc builder"
    # Push CI doesn't need this image
    if: inputs.image_postfix != '-push-ci'
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
@ -184,8 +180,7 @@ jobs:
    name: "Latest PyTorch [dev]"
    # Push CI doesn't need this image
    if: inputs.image_postfix != '-push-ci'
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
@ -220,8 +215,7 @@ jobs:
  latest-pytorch-amd:
    name: "Latest PyTorch (AMD) [dev]"
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      - 
        name: Set up Docker Buildx
@ -271,8 +265,7 @@ jobs:
    name: "Latest TensorFlow [dev]"
    # Push CI doesn't need this image
    if: inputs.image_postfix != '-push-ci'
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
@ -307,8 +300,7 @@ jobs:
  latest-pytorch-deepspeed-amd:
    name: "PyTorch + DeepSpeed (AMD) [dev]"
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      - 
        name: Set up Docker Buildx
@ -358,8 +350,7 @@ jobs:
    name: "Latest Pytorch + Quantization [dev]"
     # Push CI doesn't need this image
    if: inputs.image_postfix != '-push-ci'
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
--- a/.github/workflows/build-nightly-ci-docker-images.yml
+++ b/.github/workflows/build-nightly-ci-docker-images.yml
@ -13,9 +13,18 @@ concurrency:
 jobs:
  latest-with-torch-nightly-docker:
    name: "Nightly PyTorch + Stable TensorFlow"
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      - name: Cleanup disk
        run: |
          sudo ls -l /usr/local/lib/
          sudo ls -l /usr/share/
          sudo du -sh /usr/local/lib/
          sudo du -sh /usr/share/
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /usr/share/dotnet
          sudo du -sh /usr/local/lib/
          sudo du -sh /usr/share/
      -
        name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
@ -41,9 +50,18 @@ jobs:
  nightly-torch-deepspeed-docker:
    name: "Nightly PyTorch + DeepSpeed"
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      - name: Cleanup disk
        run: |
          sudo ls -l /usr/local/lib/
          sudo ls -l /usr/share/
          sudo du -sh /usr/local/lib/
          sudo du -sh /usr/share/
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /usr/share/dotnet
          sudo du -sh /usr/local/lib/
          sudo du -sh /usr/share/
      -
        name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
@ -64,4 +82,4 @@ jobs:
          build-args: |
            REF=main
          push: true
-          tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu
+          tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu
--- a/.github/workflows/build-past-ci-docker-images.yml
+++ b/.github/workflows/build-past-ci-docker-images.yml
@ -16,8 +16,7 @@ jobs:
      fail-fast: false
      matrix:
        version: ["1.13", "1.12", "1.11"]
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
@ -61,8 +60,7 @@ jobs:
      fail-fast: false
      matrix:
        version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"]
-    runs-on:
+    runs-on: [intel-cpu, 8-cpu, ci]
      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@ -1,7 +1,6 @@
 name: Build documentation
 on:
  workflow_dispatch:
  push:
    branches:
      - main
@ -16,7 +15,7 @@ jobs:
      commit_sha: ${{ github.sha }}
      package: transformers
      notebook_folder: transformers_doc
-      languages: ar de en es fr hi it ko pt tr zh ja te
+      languages: de en es fr hi it ko pt tr zh ja te
      custom_container: huggingface/transformers-doc-builder
    secrets:
      token: ${{ secrets.HUGGINGFACE_PUSH }}
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@ -14,5 +14,5 @@ jobs:
      commit_sha: ${{ github.event.pull_request.head.sha }}
      pr_number: ${{ github.event.number }}
      package: transformers
-      languages: ar de en es fr hi it ko pt tr zh ja te
+      languages: de en es fr hi it ko pt tr zh ja te
      custom_container: huggingface/transformers-doc-builder
--- a/.github/workflows/check_failed_model_tests.yml
+++ b/.github/workflows/check_failed_model_tests.yml
@ -1,129 +0,0 @@
 name: Process failed tests
 on:
  workflow_call:
    inputs:
      docker:
        required: true
        type: string
      start_sha:
        required: true
        type: string
 env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1
 jobs:
  run_models_gpu:
    name: " "
    runs-on:
      group: aws-g4dn-2xlarge-cache
    container:
      image: ${{ inputs.docker }}
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - uses: actions/download-artifact@v4
        with:
          name: ci_results_run_models_gpu
          path: /transformers/ci_results_run_models_gpu
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Get target commit
        working-directory: /transformers/utils
        run: |
          echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV
      - name: Checkout to `start_sha`
        working-directory: /transformers
        run: git fetch && git checkout ${{ inputs.start_sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Check failed tests
        working-directory: /transformers
        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
      - name: Show results
        working-directory: /transformers
        run: |
          ls -l new_model_failures_with_bad_commit.json
          cat new_model_failures_with_bad_commit.json
      - name: Checkout back
        working-directory: /transformers
        run: |
          git checkout ${{ inputs.start_sha }}
      - name: Process report
        shell: bash
        working-directory: /transformers
        env:
          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
        run: |
          python3 utils/process_bad_commit_report.py
      - name: Process report
        shell: bash
        working-directory: /transformers
        env:
          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
        run: |
          {
            echo 'REPORT_TEXT<<EOF'
            python3 utils/process_bad_commit_report.py
            echo EOF
          } >> "$GITHUB_ENV"
      - name: Send processed report
        if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
        uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
        with:
          # Slack channel id, channel name, or user id to post message.
          # See also: https://api.slack.com/methods/chat.postMessage#channels
          channel-id: '#transformers-ci-feedback-tests'
          # For posting a rich message using Block Kit
          payload: |
            {
              "blocks": [
                {
                  "type": "section",
                  "text": {
                    "type": "mrkdwn",
                    "text": "${{ env.REPORT_TEXT }}"
                  }
                }
              ]
            }
        env:
          SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/check_tiny_models.yml
+++ b/.github/workflows/check_tiny_models.yml
@ -23,7 +23,7 @@ jobs:
      - uses: actions/checkout@v4
      - name: Set up Python 3.8
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v4
        with:
          # Semantic version range syntax or exact version of a Python version
          python-version: '3.8'
--- a/.github/workflows/doctest_job.yml
+++ b/.github/workflows/doctest_job.yml
@ -27,8 +27,7 @@ jobs:
      fail-fast: false
      matrix:
        split_keys: ${{ fromJson(inputs.split_keys) }}
-    runs-on: 
+    runs-on: [single-gpu, nvidia-gpu, t4, ci]
      group: aws-g4dn-2xlarge-cache
    container:
      image: huggingface/transformers-all-latest-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
--- a/.github/workflows/doctests.yml
+++ b/.github/workflows/doctests.yml
@ -14,8 +14,7 @@ env:
 jobs:
  setup:
    name: Setup
-    runs-on: 
+    runs-on: [single-gpu, nvidia-gpu, t4, ci]
      group: aws-g4dn-2xlarge-cache
    container:
      image: huggingface/transformers-all-latest-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -86,4 +85,4 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: doc_test_results
-          path: doc_test_results
+          path: doc_test_results
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@ -12,12 +12,6 @@ on:
      slice_id:
        required: true
        type: number
      runner:
        required: true
        type: string
      docker:
        required: true
        type: string
 env:
  HF_HOME: /mnt/cache
@ -37,14 +31,12 @@ jobs:
  run_models_gpu:
    name: " "
    strategy:
      max-parallel: 8
      fail-fast: false
      matrix:
        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
-    runs-on:
+    runs-on: ['${{ inputs.machine_type }}', nvidia-gpu, t4, daily-ci]
      group: '${{ inputs.machine_type }}'
    container:
-      image: ${{ inputs.docker }}
+      image: huggingface/transformers-all-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Echo input and matrix info
@ -73,18 +65,6 @@ jobs:
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: Update / Install some packages (for Past CI)
        if: ${{ contains(inputs.docker, '-past-') }}
        working-directory: /transformers
        run: |
          python3 -m pip install -U datasets
      - name: Update / Install some packages (for Past CI)
        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
        working-directory: /transformers
        run: |
          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
@ -98,42 +78,25 @@ jobs:
        working-directory: /transformers
        run: pip freeze
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ inputs.machine_type }}"
          if [ "${{ inputs.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ inputs.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Run all tests on GPU
        working-directory: /transformers
-        run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+        run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+        run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: Run test
        shell: bash
        run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+          mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
+          echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+          echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+      - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+          name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+          path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/.github/workflows/model_jobs_amd.yml
+++ b/.github/workflows/model_jobs_amd.yml
@ -1,129 +0,0 @@
 name: model jobs
 on:
  workflow_call:
    inputs:
      folder_slices:
        required: true
        type: string
      machine_type:
        required: true
        type: string
      slice_id:
        required: true
        type: number
      runner:
        required: true
        type: string
      docker:
        required: true
        type: string
 env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1
 jobs:
  run_models_gpu:
    name: " "
    strategy:
      max-parallel: 1  # For now, not to parallelize. Can change later if it works well.
      fail-fast: false
      matrix:
        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
    runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
    container:
      image: ${{ inputs.docker }}
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Echo input and matrix info
        shell: bash
        run: |
          echo "${{ inputs.folder_slices }}"
          echo "${{ matrix.folders }}"
          echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: Update / Install some packages (for Past CI)
        if: ${{ contains(inputs.docker, '-past-') }}
        working-directory: /transformers
        run: |
          python3 -m pip install -U datasets
      - name: Update / Install some packages (for Past CI)
        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
        working-directory: /transformers
        run: |
          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
      - name: ROCM-SMI
        run: |
          rocm-smi
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}  -m "not not_device_test"
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: Run test
        shell: bash
        run: |
          mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
          echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
          echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
      - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/.github/workflows/push-important-models.yml
+++ b/.github/workflows/push-important-models.yml
@ -52,8 +52,7 @@ jobs:
  test_modified_files:
    needs: get_modified_models
    name: Slow & FA2 tests
-    runs-on:
+    runs-on: [single-gpu, nvidia-gpu, a10, ci]
      group: aws-g5-4xlarge-cache
    container:
      image: huggingface/transformers-all-latest-gpu
      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -134,3 +133,10 @@ jobs:
          slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
          waitForSSH: true
  benchmark:
    name: Benchmark workflow
    needs: get_modified_models
    if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
    uses: ./.github/workflows/benchmark.yml
    secrets: inherit
--- a/.github/workflows/release-conda.yml
+++ b/.github/workflows/release-conda.yml
@ -19,7 +19,7 @@ jobs:
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1
      - name: Install miniconda
        uses: conda-incubator/setup-miniconda@v2
--- a/.github/workflows/self-comment-ci.yml
+++ b/.github/workflows/self-comment-ci.yml
@ -1,417 +0,0 @@
 name: PR comment GitHub CI
 on:
  issue_comment:
    types:
      - created
    branches-ignore:
      - main
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow') }}
  cancel-in-progress: true
 permissions: read-all
 env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1
 jobs:
  get-pr-number:
    runs-on: ubuntu-22.04
    name: Get PR number
    # For security: only allow team members to run
    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
    outputs:
      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
    steps:
      - name: Get PR number
        shell: bash
        run: |
          if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
          else
            echo "PR_NUMBER=" >> $GITHUB_ENV
          fi
      - name: Check PR number
        shell: bash
        run: |
          echo "${{ env.PR_NUMBER }}"
      - name: Set PR number
        id: set_pr_number
        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
  get-sha:
    runs-on: ubuntu-22.04
    needs: get-pr-number
    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
    outputs:
      PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
      PR_MERGE_SHA: ${{ steps.get_sha.outputs.PR_MERGE_SHA }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: "0"
          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
      - name: Get SHA (and verify timestamps against the issue comment date)
        id: get_sha
        env:
          PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
          COMMENT_DATE: ${{ github.event.comment.created_at }}
        run: |
            git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
            git checkout refs/remotes/pull/$PR_NUMBER/head
            echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
            echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
            git fetch origin refs/pull/$PR_NUMBER/merge:refs/remotes/pull/$PR_NUMBER/merge
            git checkout refs/remotes/pull/$PR_NUMBER/merge
            echo "PR_MERGE_SHA: $(git log -1 --format=%H)"
            echo "PR_MERGE_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
            PR_MERGE_COMMIT_TIMESTAMP=$(git log -1 --date=unix --format=%cd)
            echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
            COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
            echo "COMMENT_DATE: $COMMENT_DATE"
            echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
            if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
              echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
              exit -1;
            fi
  # use a python script to handle this complex logic
  # case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
  # case 2: `run-slow model_1, model_2`
  get-tests:
    runs-on: ubuntu-22.04
    needs: [get-pr-number, get-sha]
    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
    outputs:
      models: ${{ steps.models_to_run.outputs.models }}
      quantizations: ${{ steps.models_to_run.outputs.quantizations }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: "0"
          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
      - name: Verify merge commit SHA
        env:
          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
        run: |
            PR_MERGE_SHA=$(git log -1 --format=%H)
            if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
              echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
              exit -1;
            fi
      - name: Get models to test
        env:
          PR_COMMENT: ${{ github.event.comment.body }}
        run: |
          python -m pip install GitPython
          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
          echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt
          echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV
      - name: Show models to test
        id: models_to_run
        run: |
          echo "${{ env.models }}"
          echo "models=${{ env.models }}" >> $GITHUB_ENV
          echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
          echo "${{ env.quantizations }}"
          echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT
  reply_to_comment:
    name: Reply to the comment
    if: ${{ needs.get-tests.outputs.models != '[]'  || needs.get-tests.outputs.quantizations != '[]' }}
    needs: [get-pr-number, get-tests]
    permissions:
      pull-requests: write
    runs-on: ubuntu-22.04
    steps:
      - name: Reply to the comment
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          MODELS: ${{ needs.get-tests.outputs.models }}
          BODY: "This comment contains run-slow, running the specified jobs:\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
        run: |
          gh api \
            --method POST \
            -H "Accept: application/vnd.github+json" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
            -f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
  create_run:
    name: Create run
    if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
    needs: [get-sha, get-tests, reply_to_comment]
    permissions:
      statuses: write
    runs-on: ubuntu-22.04
    steps:
      - name: Create Run
        id: create_run
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          # Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
          # See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
          GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
        run: |
          gh api \
            --method POST \
            -H "Accept: application/vnd.github+json" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
            -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
  run_models_gpu:
    name: Run all tests for the model
    if: ${{ needs.get-tests.outputs.models != '[]' }}
    needs: [get-pr-number, get-sha, get-tests, create_run]
    strategy:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.get-tests.outputs.models) }}
        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
    runs-on:
       group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Echo input and matrix info
        shell: bash
        run: |
          echo "${{ matrix.folders }}"
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Checkout to PR merge commit
        working-directory: /transformers
        run: |
          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
          git log -1 --format=%H
      - name: Verify merge commit SHA
        env:
          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
        working-directory: /transformers
        run: |
          PR_MERGE_SHA=$(git log -1 --format=%H)
          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
            exit -1;
          fi
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: |
          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
          echo $CUDA_VISIBLE_DEVICES
          python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: Make sure report directory exists
        shell: bash
        run: |
          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
  run_quantization_torch_gpu:
    name: Run all tests for a quantization
    if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
    needs: [get-pr-number, get-sha, get-tests, create_run]
    strategy:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-quantization-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Checkout to PR merge commit
        working-directory: /transformers
        run: |
          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
          git log -1 --format=%H
      - name: Verify merge commit SHA
        env:
          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
        working-directory: /transformers
        run: |
          PR_MERGE_SHA=$(git log -1 --format=%H)
          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
            exit -1;
          fi
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run quantization tests on GPU
        working-directory: /transformers
        run: |
          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: Make sure report directory exists
        shell: bash
        run: |
          mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
          echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
  update_run_status:
    name: Update Check Run Status
    needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
    permissions:
      statuses: write
    if: ${{ always() && needs.create_run.result == 'success' }}
    runs-on: ubuntu-22.04
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
      STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
    steps:
      - name: Get `run_models_gpu` job status
        run: |
          echo "${{ needs.run_models_gpu.result }}"
          echo "${{ needs.run_quantization_torch_gpu.result }}"
          echo $STATUS_OK
          if [ "$STATUS_OK" = "true" ]; then
            echo "STATUS=success" >> $GITHUB_ENV
          else
            echo "STATUS=failure" >> $GITHUB_ENV
          fi
      - name: Update PR commit statuses
        run: |
          echo "${{ needs.run_models_gpu.result }}"
          echo "${{ env.STATUS }}"
          gh api \
            --method POST \
            -H "Accept: application/vnd.github+json" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
            -f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"
--- a/.github/workflows/self-nightly-caller.yml
+++ b/.github/workflows/self-nightly-caller.yml
@ -1,43 +0,0 @@
 name: Self-hosted runner (nightly-ci)
 on:
  repository_dispatch:
  schedule:
    - cron: "17 2 * * *"
  push:
    branches:
      - run_nightly_ci*
 jobs:
  build_nightly_ci_images:
    name: Build Nightly CI Docker Images
    if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
    uses: ./.github/workflows/build-nightly-ci-docker-images.yml
    secrets: inherit
  model-ci:
    name: Model CI
    needs: [build_nightly_ci_images]
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_models_gpu
      slack_report_channel: "#transformers-ci-past-future"
      runner: ci
      docker: huggingface/transformers-all-latest-torch-nightly-gpu
      ci_event: Nightly CI
    secrets: inherit
  deepspeed-ci:
    name: DeepSpeed CI
    needs: [build_nightly_ci_images]
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_torch_cuda_extensions_gpu
      slack_report_channel: "#transformers-ci-past-future"
      runner: ci
      # test deepspeed nightly build with the latest release torch
      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
      ci_event: Nightly CI
      working-directory-prefix: /workspace
    secrets: inherit
--- a/.github/workflows/self-nightly-past-ci-caller.yml
+++ b/.github/workflows/self-nightly-past-ci-caller.yml
@ -2,30 +2,65 @@ name: Self-hosted runner (nightly-past-ci-caller)
 on:
  schedule:
-    - cron: "17 2,14 * * *"
+    # 2:17 am on each Sunday and Thursday
    - cron: "17 2 * * 0,4"
  push:
    branches:
      - run_nightly_ci*
      - run_past_ci*
 jobs:
-  get_number:
+  build_nightly_ci_images:
-    name: Get number
+    name: Build Nightly CI Docker Images
-    runs-on: ubuntu-22.04
+    if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
-    outputs:
+    uses: ./.github/workflows/build-nightly-ci-docker-images.yml
-      run_number: ${{ steps.get_number.outputs.run_number }}
+    secrets: inherit
-    steps:
+
-      - name: Get number
+  run_nightly_ci:
-        id: get_number
+    name: Nightly CI
-        run: |
+    needs: [build_nightly_ci_images]
-          echo "${{ github.run_number }}"
+    uses: ./.github/workflows/self-nightly-scheduled.yml
-          echo "$(python3 -c 'print(int(${{ github.run_number }}) % 10)')"
+    secrets: inherit
-          echo "run_number=$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" >> $GITHUB_OUTPUT
+
  run_past_ci_pytorch_1-13:
    name: PyTorch 1.13
    if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
    needs: [run_nightly_ci]
    uses: ./.github/workflows/self-past.yml
    with:
      framework: pytorch
      version: "1.13"
      sha: ${{ github.sha }}
    secrets: inherit
  run_past_ci_pytorch_1-12:
    name: PyTorch 1.12
    if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
    needs: [run_past_ci_pytorch_1-13]
    uses: ./.github/workflows/self-past.yml
    with:
      framework: pytorch
      version: "1.12"
      sha: ${{ github.sha }}
    secrets: inherit
  run_past_ci_pytorch_1-11:
    name: PyTorch 1.11
    if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
    needs: [run_past_ci_pytorch_1-12]
    uses: ./.github/workflows/self-past.yml
    with:
      framework: pytorch
      version: "1.11"
      sha: ${{ github.sha }}
    secrets: inherit
  run_past_ci_tensorflow_2-11:
    name: TensorFlow 2.11
-    needs: get_number
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    if: needs.get_number.outputs.run_number == 3 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_pytorch_1-11]
-    uses: ./.github/workflows/self-past-caller.yml
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.11"
@ -34,9 +69,9 @@ jobs:
  run_past_ci_tensorflow_2-10:
    name: TensorFlow 2.10
-    needs: get_number
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    if: needs.get_number.outputs.run_number == 4 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-11]
-    uses: ./.github/workflows/self-past-caller.yml
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.10"
@ -45,9 +80,9 @@ jobs:
  run_past_ci_tensorflow_2-9:
    name: TensorFlow 2.9
-    needs: get_number
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    if: needs.get_number.outputs.run_number == 5 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-10]
-    uses: ./.github/workflows/self-past-caller.yml
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.9"
@ -56,9 +91,9 @@ jobs:
  run_past_ci_tensorflow_2-8:
    name: TensorFlow 2.8
-    needs: get_number
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    if: needs.get_number.outputs.run_number == 6 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-9]
-    uses: ./.github/workflows/self-past-caller.yml
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.8"
@ -67,9 +102,9 @@ jobs:
  run_past_ci_tensorflow_2-7:
    name: TensorFlow 2.7
-    needs: get_number
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    if: needs.get_number.outputs.run_number == 7 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-8]
-    uses: ./.github/workflows/self-past-caller.yml
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.7"
@ -78,9 +113,9 @@ jobs:
  run_past_ci_tensorflow_2-6:
    name: TensorFlow 2.6
-    needs: get_number
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    if: needs.get_number.outputs.run_number == 8 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-7]
-    uses: ./.github/workflows/self-past-caller.yml
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.6"
@ -89,9 +124,9 @@ jobs:
  run_past_ci_tensorflow_2-5:
    name: TensorFlow 2.5
-    needs: get_number
+    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    if: needs.get_number.outputs.run_number == 9 &&  (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
+    needs: [run_past_ci_tensorflow_2-6]
-    uses: ./.github/workflows/self-past-caller.yml
+    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
      version: "2.5"
--- a/.github/workflows/self-nightly-scheduled.yml
+++ b/.github/workflows/self-nightly-scheduled.yml
@ -0,0 +1,290 @@
 name: Self-hosted runner (nightly-ci)
 # Note that each job's dependencies go into a corresponding docker file.
 #
 # For example for `run_torch_cuda_extensions_gpu` the docker image is
 # `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
 # `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
 on:
  repository_dispatch:
  workflow_call:
 env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1
 jobs:
  setup:
    name: Setup
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
    container:
      image: huggingface/transformers-all-latest-torch-nightly-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Update clone
        working-directory: /transformers
        run: |
          git fetch && git checkout ${{ github.sha }}
      - name: Cleanup
        working-directory: /transformers
        run: |
          rm -rf tests/__pycache__
          rm -rf tests/models/__pycache__
          rm -rf reports
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - id: set-matrix
        name: Identify models to test
        working-directory: /transformers/tests
        run: |
          echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
  run_tests_single_gpu:
    name: Model tests
    strategy:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
        machine_type: [single-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
    container:
      image: huggingface/transformers-all-latest-torch-nightly-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    needs: setup
    steps:
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
  run_tests_multi_gpu:
    name: Model tests
    strategy:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
        machine_type: [multi-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
    container:
      image: huggingface/transformers-all-latest-torch-nightly-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    needs: setup
    steps:
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
  run_torch_cuda_extensions_gpu:
    name: Torch CUDA extension tests
    strategy:
      fail-fast: false
      matrix:
        machine_type: [single-gpu, multi-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
    needs: setup
    container:
      image: huggingface/transformers-pytorch-deepspeed-nightly-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
        working-directory: /workspace/transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /workspace/transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: Remove cached torch extensions
        run: rm -rf /github/home/.cache/torch_extensions/
      # To avoid unknown test failures
      - name: Pre build DeepSpeed *again*
        working-directory: /workspace
        run: |
          python3 -m pip uninstall -y deepspeed
          rm -rf DeepSpeed
          git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Environment
        working-directory: /workspace/transformers
        run: |
          python utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /workspace/transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /workspace/transformers
        run: |
          python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly
          path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
  send_results:
    name: Send results to webhook
    runs-on: ubuntu-22.04
    if: always()
    needs: [
      setup,
      run_tests_single_gpu,
      run_tests_multi_gpu,
      run_torch_cuda_extensions_gpu
    ]
    steps:
      - name: Preliminary job status
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          echo "Setup status: ${{ needs.setup.result }}"
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
      - name: Send message to Slack
        env:
          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          CI_EVENT: Nightly CI
          SETUP_STATUS: ${{ needs.setup.result }}
        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        run: |
          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
      # delete-artifact
      - uses: geekyeggo/delete-artifact@v2
        with:
          name: |
              single-*
              multi-*
--- a/.github/workflows/self-past-caller.yml
+++ b/.github/workflows/self-past-caller.yml
@ -1,40 +0,0 @@
 name: Self-hosted runner (past-ci)
 on:
  workflow_call:
    inputs:
      framework:
        required: true
        type: string
      version:
        required: true
        type: string
      # Use this to control the commit to test against
      sha:
        default: 'main'
        required: false
        type: string
 jobs:
  model-ci:
    name: Model CI
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_models_gpu
      slack_report_channel: "#transformers-ci-past-future"
      runner: past-ci
      docker: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
      ci_event: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
    secrets: inherit
  deepspeed-ci:
    name: DeepSpeed CI
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_torch_cuda_extensions_gpu
      slack_report_channel: "#transformers-ci-past-future"
      runner: past-ci
      docker: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
      ci_event: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
    secrets: inherit
--- a/.github/workflows/self-past.yml
+++ b/.github/workflows/self-past.yml
@ -0,0 +1,357 @@
 name: Self-hosted runner (past-ci)
 # Note that each job's dependencies go into a corresponding docker file.
 #
 # For example for `run_torch_cuda_extensions_gpu` the docker image is
 # `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
 # `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
 on:
  workflow_call:
    inputs:
      framework:
        required: true
        type: string
      version:
        required: true
        type: string
      # Use this to control the commit to test against
      sha:
        default: 'main'
        required: false
        type: string
 env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1
 jobs:
  setup:
    name: Setup
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
    container:
      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ inputs.sha }}
      - name: Cleanup
        working-directory: /transformers
        run: |
          rm -rf tests/__pycache__
          rm -rf tests/models/__pycache__
          rm -rf reports
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - id: set-matrix
        working-directory: /transformers
        name: Identify models to test
        run: |
          cd tests
          echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT
  run_tests_single_gpu:
    name: Model tests
    strategy:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
        machine_type: [single-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
    container:
      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    needs: setup
    steps:
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ inputs.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: Update some packages
        working-directory: /transformers
        run: python3 -m pip install -U datasets
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Install
        if: inputs.framework == 'pytorch'
        working-directory: /transformers
        run: |
          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
      - name: Save job name
        if: ${{ always() }}
        shell: bash
        run: |
          matrix_folders=${matrix_folders/'models_'/'models/'}
          job_name="Model tests ($matrix_folders, ${{ matrix.machine_type }})"
          echo "$job_name"
          echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
  run_tests_multi_gpu:
    name: Model tests
    strategy:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
        machine_type: [multi-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
    container:
      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    needs: setup
    steps:
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ inputs.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: Update some packages
        working-directory: /transformers
        run: python3 -m pip install -U datasets
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Install
        if: inputs.framework == 'pytorch'
        working-directory: /transformers
        run: |
          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
      - name: Save job name
        if: ${{ always() }}
        shell: bash
        run: |
          matrix_folders=${matrix_folders/'models_'/'models/'}
          job_name="Model tests ($matrix_folders, ${{ matrix.machine_type }})"
          echo "$job_name"
          echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
  run_torch_cuda_extensions_gpu:
    name: Torch CUDA extension tests
    if: inputs.framework == 'pytorch'
    strategy:
      fail-fast: false
      matrix:
        machine_type: [single-gpu, multi-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci]
    needs: setup
    container:
      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: Update some packages
        working-directory: /transformers
        run: python3 -m pip install -U datasets
      - name: Install
        working-directory: /transformers
        run: |
          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
      - name: Remove cached torch extensions
        run: rm -rf /github/home/.cache/torch_extensions/
      # To avoid unknown test failures
      - name: Pre build DeepSpeed *again*
        working-directory: /
        run: |
          python3 -m pip uninstall -y deepspeed
          rm -rf DeepSpeed
          git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: |
          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
          path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
  send_results:
    name: Send results to webhook
    runs-on: ubuntu-22.04
    if: always()
    needs: [
      setup,
      run_tests_single_gpu,
      run_tests_multi_gpu,
      run_torch_cuda_extensions_gpu
    ]
    steps:
      - name: Preliminary job status
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          echo "Setup status: ${{ needs.setup.result }}"
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
      # Create a directory to store test failure tables in the next step
      - name: Create directory
        run: mkdir test_failure_tables
      - name: Send message to Slack
        env:
          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          CI_EVENT: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
          SETUP_STATUS: ${{ needs.setup.result }}
        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        run: |
          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
      - name: Failure table artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
          path: test_failure_tables
      # delete-artifact
      - uses: geekyeggo/delete-artifact@v2
        with:
          name: |
              single-*
              multi-*
--- a/.github/workflows/self-pr-slow-ci.yml
+++ b/.github/workflows/self-pr-slow-ci.yml
@ -0,0 +1,135 @@
 name: PR slow CI
 on:
  pull_request:
    paths:
      - "src/transformers/models/*/modeling_*.py"
      - "tests/models/*/test_*.py"
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1
 jobs:
  find_models_to_run:
      runs-on: ubuntu-22.04
      name: Find models to run slow tests
      # Triggered only if the required label `run-slow` is added
      if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }}
      outputs:
        models: ${{ steps.models_to_run.outputs.models }}
      steps:
        - uses: actions/checkout@v4
          with:
            fetch-depth: "0"
            ref: ${{ github.event.pull_request.head.sha }}
        - name: Get commit message
          run: |
            echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV
        - name: Get models to run slow tests
          run: |
            echo "${{ env.commit_message }}"
            python -m pip install GitPython
            python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" | tee output.txt
            echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
        - name: Models to run slow tests
          id: models_to_run
          run: |
            echo "${{ env.models }}"
            echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
  run_models_gpu:
      name: Run all tests for the model
      # Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
      # (either a new model PR or via a commit message)
      if: ${{ needs.find_models_to_run.outputs.models != '[]' }}
      needs: find_models_to_run
      strategy:
        fail-fast: false
        matrix:
          folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
          machine_type: [single-gpu, multi-gpu]
      runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci]
      container:
        image: huggingface/transformers-all-latest-gpu
        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
      steps:
      - name: Echo input and matrix info
        shell: bash
        run: |
          echo "${{ matrix.folders }}"
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: |
          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
          echo $CUDA_VISIBLE_DEVICES
          python3 -m pytest -v -rsfE --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: Make sure report directory exists
        shell: bash
        run: |
          mkdir -p /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
          echo "hello" > /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
          echo "${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/.github/workflows/self-push-amd-mi210-caller.yml
+++ b/.github/workflows/self-push-amd-mi210-caller.yml
@ -1,25 +1,25 @@
-name: Self-hosted runner (AMD mi210 CI caller)
+name: Self-hosted runner (AMD mi210 CI caller)
-
+
-on:
+on:
-  #workflow_run:
+  workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
+    workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
+    branches: ["main"]
-  #  types: [completed]
+    types: [completed]
-  push:
+  push:
-    branches:
+    branches:
-      - run_amd_push_ci_caller*
+      - run_amd_push_ci_caller*
-    paths:
+    paths:
-      - "src/**"
+      - "src/**"
-      - "tests/**"
+      - "tests/**"
-      - ".github/**"
+      - ".github/**"
-      - "templates/**"
+      - "templates/**"
-      - "utils/**"
+      - "utils/**"
-
+
-jobs:
+jobs:
-  run_amd_ci:
+  run_amd_ci:
-    name: AMD mi210
+    name: AMD mi210
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
+    uses: ./.github/workflows/self-push-amd.yml
-    with:
+    with:
-      gpu_flavor: mi210
+      gpu_flavor: mi210
-    secrets: inherit
+    secrets: inherit
--- a/.github/workflows/self-push-amd-mi250-caller.yml
+++ b/.github/workflows/self-push-amd-mi250-caller.yml
@ -1,25 +1,25 @@
-name: Self-hosted runner (AMD mi250 CI caller)
+name: Self-hosted runner (AMD mi250 CI caller)
-
+
-on:
+on:
-  #workflow_run:
+  workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
+    workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
+    branches: ["main"]
-  #  types: [completed]
+    types: [completed]
-  push:
+  push:
-    branches:
+    branches:
-      - run_amd_push_ci_caller*
+      - run_amd_push_ci_caller*
-    paths:
+    paths:
-      - "src/**"
+      - "src/**"
-      - "tests/**"
+      - "tests/**"
-      - ".github/**"
+      - ".github/**"
-      - "templates/**"
+      - "templates/**"
-      - "utils/**"
+      - "utils/**"
-
+
-jobs:
+jobs:
-  run_amd_ci:
+  run_amd_ci:
-    name: AMD mi250
+    name: AMD mi250
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
+    uses: ./.github/workflows/self-push-amd.yml
-    with:
+    with:
-      gpu_flavor: mi250
+      gpu_flavor: mi250
-    secrets: inherit
+    secrets: inherit
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@ -1,10 +1,10 @@
 name: Self-hosted runner (AMD mi300 CI caller)
 on:
-  #workflow_run:
+  workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
+    workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
+    branches: ["main"]
-  #  types: [completed]
+    types: [completed]
  push:
    branches:
      - run_amd_push_ci_caller*
--- a/.github/workflows/self-push-amd.yml
+++ b/.github/workflows/self-push-amd.yml
@ -64,24 +64,23 @@ jobs:
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      test_map: ${{ steps.set-matrix.outputs.test_map }}
    env:
      # `CI_BRANCH_PUSH`: The branch name from the push event
      # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
      # `CI_SHA_PUSH`: The commit SHA from the push event
      # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
      - name: Prepare custom environment variables
        shell: bash
        # `CI_BRANCH_PUSH`: The branch name from the push event
        # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
        # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
        # `CI_SHA_PUSH`: The commit SHA from the push event
        # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
        # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -160,12 +159,6 @@ jobs:
    container:
      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -173,7 +166,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -259,12 +256,6 @@ jobs:
 #        run_tests_torch_cuda_extensions_single_gpu,
 #        run_tests_torch_cuda_extensions_multi_gpu
    ]
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      - name: Preliminary job status
        shell: bash
@ -280,7 +271,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -329,7 +324,6 @@ jobs:
        # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        run: |
          pip install huggingface_hub
          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@ -32,33 +32,31 @@ jobs:
    name: Setup
    strategy:
      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        machine_type: [single-gpu, multi-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      test_map: ${{ steps.set-matrix.outputs.test_map }}
    env:
      # `CI_BRANCH_PUSH`: The branch name from the push event
      # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
      # `CI_SHA_PUSH`: The commit SHA from the push event
      # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
      - name: Prepare custom environment variables
        shell: bash
        # `CI_BRANCH_PUSH`: The branch name from the push event
        # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
        # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
        # `CI_SHA_PUSH`: The commit SHA from the push event
        # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
        # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -132,18 +130,11 @@ jobs:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
-        machine_type: [aws-g4dn-2xlarge-cache]
+        machine_type: [single-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -151,7 +142,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -164,23 +159,6 @@ jobs:
          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
          echo "env.CI_SHA = ${{ env.CI_SHA }}"
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Update clone using environment variables
        working-directory: /transformers
        run: |
@ -222,19 +200,19 @@ jobs:
      - name: Run all non-slow selected tests on GPU
        working-directory: /transformers
        run: |
-          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
+          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
+          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
  run_tests_multi_gpu:
    name: Model tests
@ -245,18 +223,11 @@ jobs:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
-        machine_type: [aws-g4dn-12xlarge-cache]
+        machine_type: [multi-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -264,7 +235,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -277,23 +252,6 @@ jobs:
          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
          echo "env.CI_SHA = ${{ env.CI_SHA }}"
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Update clone using environment variables
        working-directory: /transformers
        run: |
@ -337,19 +295,19 @@ jobs:
          MKL_SERVICE_FORCE_INTEL: 1
        working-directory: /transformers
        run: |
-          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
+          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
+          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
  run_tests_torch_cuda_extensions_single_gpu:
    name: Torch CUDA extension tests
@ -358,18 +316,11 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache]
+        machine_type: [single-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -377,7 +328,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -390,23 +345,6 @@ jobs:
          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
          echo "env.CI_SHA = ${{ env.CI_SHA }}"
      - name: Set `machine_type` for report and artifact names
        working-directory: /workspace/transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Update clone using environment variables
        working-directory: /workspace/transformers
        run: |
@ -447,19 +385,19 @@ jobs:
        working-directory: /workspace/transformers
        # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
        run: |
-          python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+          python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+        run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
  run_tests_torch_cuda_extensions_multi_gpu:
    name: Torch CUDA extension tests
@ -468,18 +406,11 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g4dn-12xlarge-cache]
+        machine_type: [multi-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
      # We also take into account the `push` event (we might want to test some changes in a branch)
@ -487,7 +418,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -500,23 +435,6 @@ jobs:
          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
          echo "env.CI_SHA = ${{ env.CI_SHA }}"
      - name: Set `machine_type` for report and artifact names
        working-directory: /workspace/transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Update clone using environment variables
        working-directory: /workspace/transformers
        run: |
@ -557,19 +475,19 @@ jobs:
        working-directory: /workspace/transformers
        # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
        run: |
-          python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+          python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+        run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
  send_results:
    name: Send results to webhook
@ -582,12 +500,6 @@ jobs:
        run_tests_torch_cuda_extensions_single_gpu,
        run_tests_torch_cuda_extensions_multi_gpu
    ]
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
    steps:
      - name: Preliminary job status
        shell: bash
@ -601,7 +513,11 @@ jobs:
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          CI_BRANCH_PUSH=${{ github.event.ref }}
          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
          CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
          CI_SHA_PUSH=${{ github.event.head_commit.id }}
          CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
          echo $CI_BRANCH_PUSH
          echo $CI_BRANCH_WORKFLOW_RUN
          echo $CI_SHA_PUSH
@ -647,7 +563,6 @@ jobs:
        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        run: |
-          pip install huggingface_hub
+          pip install slack_sdk
          pip install slack_sdk 
          pip show slack_sdk
          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
--- a/.github/workflows/self-scheduled-amd-mi210-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml
@ -1,55 +1,20 @@
-name: Self-hosted runner (AMD mi210 scheduled CI caller)
+name: Self-hosted runner (AMD mi210 scheduled CI caller)
-
+
-on:
+on:
-  workflow_run:
+  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
+    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
+    branches: ["main"]
-    types: [completed]
+    types: [completed]
-  push:
+  push:
-    branches:
+    branches:
-      - run_amd_scheduled_ci_caller*
+      - run_amd_scheduled_ci_caller*
-
+
-jobs:
+jobs:
-  model-ci:
+  run_amd_ci:
-    name: Model CI
+    name: AMD mi210
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller')))
-    with:
+    uses: ./.github/workflows/self-scheduled-amd.yml
-      job: run_models_gpu
+    with:
-      slack_report_channel: "#transformers-ci-daily-amd"
+      gpu_flavor: mi210
-      runner: mi210
+      slack_report_channel: "#transformers-ci-daily-amd"
-      docker: huggingface/transformers-pytorch-amd-gpu
+    secrets: inherit
      ci_event: Scheduled CI (AMD) - mi210
    secrets: inherit
  torch-pipeline:
    name: Torch pipeline CI
    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
    with:
      job: run_pipelines_torch_gpu
      slack_report_channel: "#transformers-ci-daily-amd"
      runner: mi210
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi210
    secrets: inherit
  example-ci:
    name: Example CI
    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
    with:
      job: run_examples_gpu
      slack_report_channel: "#transformers-ci-daily-amd"
      runner: mi210
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi210
    secrets: inherit
  deepspeed-ci:
    name: DeepSpeed CI
    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
    with:
      job: run_torch_cuda_extensions_gpu
      slack_report_channel: "#transformers-ci-daily-amd"
      runner: mi210
      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
      ci_event: Scheduled CI (AMD) - mi210
    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi250-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml
@ -1,55 +1,20 @@
-name: Self-hosted runner (AMD mi250 scheduled CI caller)
+name: Self-hosted runner (AMD mi250 scheduled CI caller)
-
+
-on:
+on:
-  workflow_run:
+  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
+    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
+    branches: ["main"]
-    types: [completed]
+    types: [completed]
-  push:
+  push:
-    branches:
+    branches:
-      - run_amd_scheduled_ci_caller*
+      - run_amd_scheduled_ci_caller*
-
+
-jobs:
+jobs:
-  model-ci:
+  run_amd_ci:
-    name: Model CI
+    name: AMD mi250
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller')))
-    with:
+    uses: ./.github/workflows/self-scheduled-amd.yml
-      job: run_models_gpu
+    with:
-      slack_report_channel: "#transformers-ci-daily-amd"
+      gpu_flavor: mi250
-      runner: mi250
+      slack_report_channel: "#transformers-ci-daily-amd"
-      docker: huggingface/transformers-pytorch-amd-gpu
+    secrets: inherit
      ci_event: Scheduled CI (AMD) - mi250
    secrets: inherit
  torch-pipeline:
    name: Torch pipeline CI
    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
    with:
      job: run_pipelines_torch_gpu
      slack_report_channel: "#transformers-ci-daily-amd"
      runner: mi250
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi250
    secrets: inherit
  example-ci:
    name: Example CI
    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
    with:
      job: run_examples_gpu
      slack_report_channel: "#transformers-ci-daily-amd"
      runner: mi250
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi250
    secrets: inherit
  deepspeed-ci:
    name: DeepSpeed CI
    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
    with:
      job: run_torch_cuda_extensions_gpu
      slack_report_channel: "#transformers-ci-daily-amd"
      runner: mi250
      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
      ci_event: Scheduled CI (AMD) - mi250
    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi300-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi300-caller.yml
@ -0,0 +1,21 @@
 name: Self-hosted runner (AMD mi300 scheduled CI caller)
 on:
  workflow_run:
    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
    branches: ["main"]
    types: [completed]
  push:
    branches:
      - run_amd_scheduled_ci_caller*
 jobs:
  run_amd_ci:
    name: AMD mi300
    needs: build-docker-containers
    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
    uses: ./.github/workflows/self-scheduled-amd.yml
    with:
      gpu_flavor: mi300
      slack_report_channel: "#transformers-ci-daily-amd"
    secrets: inherit
--- a/.github/workflows/self-scheduled-amd.yml
+++ b/.github/workflows/self-scheduled-amd.yml
@ -0,0 +1,519 @@
 name: Self-hosted runner (scheduled-amd)
 # Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the
 # CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes
 # us towards the limit of allowed jobs on GitHub Actions.
 on:
  workflow_call:
    inputs:
      gpu_flavor:
        required: true
        type: string
 env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
 # Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running.
 # This is done so that we avoid parallelizing the scheduled tests, to leave available
 # runners for the push CI that is running on the same machine.
 jobs:
  check_runner_status:
    name: Check Runner Status
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout transformers
        uses: actions/checkout@v4
        with:
          fetch-depth: 2
      - name: Check Runner Status
        run: python utils/check_self_hosted_runner.py --target_runners hf-amd-mi210-ci-1gpu-1,hf-amd-mi250-ci-1gpu-1,hf-amd-mi300-ci-1gpu-1 --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
  check_runners:
    name: Check Runners
    needs: check_runner_status
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: ROCM-SMI
        run: |
          rocm-smi
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
  setup:
    name: Setup
    needs: check_runners
    strategy:
      matrix:
        machine_type: [single-gpu, multi-gpu]
    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Update clone
        working-directory: /transformers
        run: |
          git fetch && git checkout ${{ github.sha }}
      - name: Cleanup
        working-directory: /transformers
        run: |
          rm -rf tests/__pycache__
          rm -rf tests/models/__pycache__
          rm -rf reports
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - id: set-matrix
        name: Identify models to test
        working-directory: /transformers/tests
        run: |
          echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT
      - name: ROCM-SMI
        run: |
          rocm-smi
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
  run_models_gpu_single_gpu:
    name: Single GPU tests
    strategy:
      max-parallel: 1  # For now, not to parallelize. Can change later if it works well.
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
        machine_type: [single-gpu]
    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    needs: setup
    steps:
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: ROCM-SMI
        run: |
          rocm-smi
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
  run_models_gpu_multi_gpu:
    name: Multi GPU tests
    strategy:
      max-parallel: 1
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
        machine_type: [multi-gpu]
    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    needs: setup
    steps:
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: ROCM-SMI
        run: |
          rocm-smi
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
  run_examples_gpu:
    name: Examples tests
    strategy:
      fail-fast: false
      matrix:
        machine_type: [single-gpu]
    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    needs: setup
    steps:
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: ROCM-SMI
        run: |
          rocm-smi
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run examples tests on GPU
        working-directory: /transformers
        run: |
          pip install -r examples/pytorch/_tests_requirements.txt
          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
          path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
  run_pipelines_torch_gpu:
    name: PyTorch pipelines tests
    strategy:
      fail-fast: false
      matrix:
        machine_type: [single-gpu, multi-gpu]
    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    container:
      image: huggingface/transformers-pytorch-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    needs: setup
    steps:
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: ROCM-SMI
        run: |
          rocm-smi
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all pipeline tests on GPU
        working-directory: /transformers
        run: |
          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
          path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
  run_torch_cuda_extensions_gpu:
    name: Torch ROCm deepspeed tests
    strategy:
      fail-fast: false
      matrix:
        machine_type: [single-gpu, multi-gpu]
    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
    needs: setup
    container:
      image: huggingface/transformers-pytorch-deepspeed-amd-gpu
      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: ROCM-SMI
        run: |
          rocm-smi
      - name: ROCM-INFO
        run: |
          rocminfo  | grep "Agent" -A 14
      - name: Show ROCR environment
        run: |
          echo "ROCR: $ROCR_VISIBLE_DEVICES"
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended -m "not not_device_test"
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
          path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
  run_extract_warnings:
    name: Extract warnings in CI artifacts
    runs-on: ubuntu-22.04
    if: always()
    needs: [
      check_runner_status,
      check_runners,
      setup,
      run_models_gpu_single_gpu,
      run_models_gpu_multi_gpu,
      run_examples_gpu,
      run_pipelines_torch_gpu,
      run_torch_cuda_extensions_gpu
    ]
    steps:
      - name: Checkout transformers
        uses: actions/checkout@v4
        with:
          fetch-depth: 2
      - name: Install transformers
        run: pip install transformers
      - name: Show installed libraries and their versions
        run: pip freeze
      - name: Create output directory
        run: mkdir warnings_in_ci
      - uses: actions/download-artifact@v4
        with:
          path: warnings_in_ci
      - name: Show artifacts
        run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
        working-directory: warnings_in_ci
      - name: Extract warnings in CI artifacts
        run: |
          python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
          echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
      - name: Upload artifact
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: warnings_in_ci
          path: warnings_in_ci/selected_warnings.json
  send_results:
    name: Send results to webhook
    runs-on: ubuntu-22.04
    if: always()
    needs: [
      check_runner_status,
      check_runners,
      setup,
      run_models_gpu_single_gpu,
      run_models_gpu_multi_gpu,
      run_examples_gpu,
      run_pipelines_torch_gpu,
      run_torch_cuda_extensions_gpu,
      run_extract_warnings
    ]
    steps:
      - name: Preliminary job status
        shell: bash
        # For the meaning of these environment variables, see the job `Setup`
        run: |
          echo "Runner availability: ${{ needs.check_runner_status.result }}"
          echo "Runner status: ${{ needs.check_runners.result }}"
          echo "Setup status: ${{ needs.setup.result }}"
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
      - name: Send message to Slack
        env:
          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
          CI_SLACK_CHANNEL_ID_DAILY_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }}
          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          CI_EVENT: Scheduled CI (AMD) - ${{ inputs.gpu_flavor }}
          CI_SHA: ${{ github.sha }}
          CI_WORKFLOW_REF: ${{ github.workflow_ref }}
          RUNNER_STATUS: ${{ needs.check_runner_status.result }}
          RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
          SETUP_STATUS: ${{ needs.setup.result }}
        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        run: |
          sudo apt-get install -y curl
          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
      - name: Failure table artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: test_failure_tables
          path: test_failure_tables
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@ -16,9 +16,6 @@ jobs:
    with:
      job: run_models_gpu
      slack_report_channel: "#transformers-ci-daily-models"
      runner: daily-ci
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
    secrets: inherit
  torch-pipeline:
@ -27,9 +24,6 @@ jobs:
    with:
      job: run_pipelines_torch_gpu
      slack_report_channel: "#transformers-ci-daily-pipeline-torch"
      runner: daily-ci
      docker: huggingface/transformers-pytorch-gpu
      ci_event: Daily CI
    secrets: inherit
  tf-pipeline:
@ -38,9 +32,6 @@ jobs:
    with:
      job: run_pipelines_tf_gpu
      slack_report_channel: "#transformers-ci-daily-pipeline-tf"
      runner: daily-ci
      docker: huggingface/transformers-tensorflow-gpu
      ci_event: Daily CI
    secrets: inherit
  example-ci:
@ -49,9 +40,6 @@ jobs:
    with:
      job: run_examples_gpu
      slack_report_channel: "#transformers-ci-daily-examples"
      runner: daily-ci
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
    secrets: inherit
  deepspeed-ci:
@ -60,10 +48,6 @@ jobs:
    with:
      job: run_torch_cuda_extensions_gpu
      slack_report_channel: "#transformers-ci-daily-deepspeed"
      runner: daily-ci
      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
      ci_event: Daily CI
      working-directory-prefix: /workspace
    secrets: inherit
  quantization-ci:
@ -72,7 +56,4 @@ jobs:
    with:
      job: run_quantization_torch_gpu
      slack_report_channel: "#transformers-ci-daily-quantization"
      runner: daily-ci
      docker: huggingface/transformers-quantization-latest-gpu
      ci_event: Daily CI
    secrets: inherit
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -15,19 +15,6 @@ on:
      slack_report_channel:
        required: true
        type: string
      runner:
        required: true
        type: string
      docker:
        required: true
        type: string
      ci_event:
        required: true
        type: string
      working-directory-prefix:
        default: ''
        required: false
        type: string
 env:
  HF_HOME: /mnt/cache
@ -50,9 +37,8 @@ jobs:
    name: Setup
    strategy:
      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        machine_type: [single-gpu, multi-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -84,7 +70,7 @@ jobs:
        run: |
          echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
          echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-
+      
      - id: set-matrix-quantization
        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
        name: Identify quantization method to test
@ -103,15 +89,13 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        machine_type: [single-gpu, multi-gpu]
        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
    uses: ./.github/workflows/model_jobs.yml
    with:
      folder_slices: ${{ needs.setup.outputs.folder_slices }}
      machine_type: ${{ matrix.machine_type }}
      slice_id: ${{ matrix.slice_id }}
      runner: ${{ inputs.runner }}
      docker: ${{ inputs.docker }}
    secrets: inherit
  run_pipelines_torch_gpu:
@ -120,9 +104,8 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        machine_type: [single-gpu, multi-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-pytorch-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -148,39 +131,22 @@ jobs:
        working-directory: /transformers
        run: pip freeze
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Run all pipeline tests on GPU
        working-directory: /transformers
        run: |
-          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
+          name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
  run_pipelines_tf_gpu:
    if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
@ -188,9 +154,8 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        machine_type: [single-gpu, multi-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-tensorflow-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -217,39 +182,22 @@ jobs:
        working-directory: /transformers
        run: pip freeze
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Run all pipeline tests on GPU
        working-directory: /transformers
        run: |
-          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
      - name: Failure short reports
        if: ${{ always() }}
        run: |
-          cat /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
+          cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
+          name: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports
  run_examples_gpu:
    if: ${{ inputs.job == 'run_examples_gpu' }}
@ -257,9 +205,8 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache]
+        machine_type: [single-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -285,40 +232,23 @@ jobs:
        working-directory: /transformers
        run: pip freeze
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Run examples tests on GPU
        working-directory: /transformers
        run: |
          pip install -r examples/pytorch/_tests_requirements.txt
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch
+          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_examples_gpu_test_reports
+          name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
  run_torch_cuda_extensions_gpu:
    if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
@ -326,108 +256,70 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        machine_type: [single-gpu, multi-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
      group: '${{ matrix.machine_type }}'
    container:
-      image: ${{ inputs.docker }}
+      image: huggingface/transformers-pytorch-deepspeed-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: git fetch && git checkout ${{ github.sha }}
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: Update / Install some packages (for Past CI)
        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
        working-directory: ${{ inputs.working-directory-prefix }}/transformers
        run: |
          python3 -m pip install -U datasets
          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
      - name: Remove cached torch extensions
        run: rm -rf /github/home/.cache/torch_extensions/
      # To avoid unknown test failures
-      - name: Pre build DeepSpeed *again* (for daily CI)
+      - name: Pre build DeepSpeed *again*
-        if: ${{ contains(inputs.ci_event, 'Daily CI') }}
+        working-directory: /workspace
        working-directory: ${{ inputs.working-directory-prefix }}/
        run: |
          python3 -m pip uninstall -y deepspeed
          DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
      # To avoid unknown test failures
      - name: Pre build DeepSpeed *again* (for nightly & Past CI)
        if: ${{ contains(inputs.ci_event, 'Nightly CI') || contains(inputs.ci_event, 'Past CI') }}
        working-directory: ${{ inputs.working-directory-prefix }}/
        run: |
          python3 -m pip uninstall -y deepspeed
          rm -rf DeepSpeed
          git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build
          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Environment
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: |
-          python3 utils/print_env.py
+          python utils/print_env.py
      - name: Show installed libraries and their versions
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: pip freeze
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Run all tests on GPU
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /workspace/transformers
        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+          python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+        run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
  run_quantization_torch_gpu:
    if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
    name: " "
    needs: setup
    strategy:
      max-parallel: 4
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        machine_type: [single-gpu, multi-gpu]
-    runs-on:
+    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-quantization-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -462,39 +354,22 @@ jobs:
        working-directory: /transformers
        run: pip freeze
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Run quantization tests on GPU
        working-directory: /transformers
        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
+          name: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
  run_extract_warnings:
    # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic.
@ -559,16 +434,5 @@ jobs:
      # This would be an empty string if `setup` is skipped.
      folder_slices: ${{ needs.setup.outputs.folder_slices }}
      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
-      ci_event: ${{ inputs.ci_event }}
+      
    secrets: inherit
  check_new_model_failures:
    if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job == 'run_models_gpu' && needs.send_results.result == 'success' }}
    name: Check new model failures
    needs: send_results
    uses: ./.github/workflows/check_failed_model_tests.yml
    with:
      docker: ${{ inputs.docker }}
      start_sha: ${{ github.sha }}
    secrets: inherit
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@ -18,9 +18,6 @@ on:
      quantization_matrix:
        required: true
        type: string
      ci_event:
        required: true
        type: string
 env:
  TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -48,7 +45,7 @@ jobs:
          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_EVENT: ${{ inputs.ci_event }}
+          CI_EVENT: scheduled
          CI_SHA: ${{ github.sha }}
          CI_WORKFLOW_REF: ${{ github.workflow_ref }}
          CI_TEST_JOB: ${{ inputs.job }}
@ -70,7 +67,7 @@ jobs:
        with:
          name: ci_results_${{ inputs.job }}
          path: ci_results_${{ inputs.job }}
-
+      
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
      - name: Send message to Slack for quantization workflow
@ -79,7 +76,7 @@ jobs:
          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
-          CI_EVENT: ${{ inputs.ci_event }}
+          CI_EVENT: scheduled
          CI_SHA: ${{ github.sha }}
          CI_TEST_JOB: ${{ inputs.job }}
          SETUP_STATUS: ${{ inputs.setup_status }}
@ -90,7 +87,7 @@ jobs:
          pip install huggingface_hub
          pip install slack_sdk
          pip show slack_sdk
-          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
+          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" 
      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
      - name: Failure table artifacts
@ -98,4 +95,4 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: ci_results_${{ inputs.job }}
-          path: ci_results_${{ inputs.job }}
+          path: ci_results_${{ inputs.job }}
--- a/.github/workflows/ssh-runner.yml
+++ b/.github/workflows/ssh-runner.yml
@ -26,38 +26,9 @@ env:
  RUN_PT_TF_CROSS_TESTS: 1
 jobs:
  get_runner:
    name: "Get runner to use"
    runs-on: ubuntu-22.04
    outputs:
      RUNNER: ${{ steps.set_runner.outputs.RUNNER }}
    steps:
      - name: Get runner to use
        shell: bash
        run: |
          if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
            echo "RUNNER=aws-g4dn-2xlarge-cache" >> $GITHUB_ENV
          elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
            echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
          elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
            echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV
          elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
            echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV
          else
            echo "RUNNER=" >> $GITHUB_ENV
          fi
      - name: Set runner to use
        id: set_runner
        run: |
          echo ${{ env.RUNNER }}
          echo "RUNNER=${{ env.RUNNER }}" >> $GITHUB_OUTPUT
  ssh_runner:
    name: "SSH"
-    needs: get_runner
+    runs-on: ["${{ github.event.inputs.num_gpus }}-gpu", nvidia-gpu, "${{ github.event.inputs.runner_type }}", ci]
    runs-on:
      group: ${{ needs.get_runner.outputs.RUNNER }}
    container:
      image: ${{ github.event.inputs.docker_image }}
      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -82,33 +53,11 @@ jobs:
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
-
+      
      - name: Store Slack infos
        #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
        shell: bash
        run: |
          echo "${{ github.actor }}"
          github_actor=${{ github.actor }}
          github_actor=${github_actor/'-'/'_'}
          echo "$github_actor"
          echo "github_actor=$github_actor" >> $GITHUB_ENV
      - name: Store Slack infos
        #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
        shell: bash
        run: |
          echo "${{ env.github_actor }}"
          if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then
            echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV
          else
            echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV
          fi
      - name: Tailscale # In order to be able to SSH when a test fails
        uses: huggingface/tailscale-action@main
        with:
          authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
-          slackChannel: ${{ env.SLACKCHANNEL }}
+          slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
          waitForSSH: true
          sshTimeout: 15m
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -9,15 +9,13 @@ jobs:
    name: Close Stale Issues
    if: github.repository == 'huggingface/transformers'
    runs-on: ubuntu-22.04
    permissions:
      issues: write
    env:
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
    - uses: actions/checkout@v4
    - name: Setup Python
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v4
      with:
        python-version: 3.8
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@ -10,11 +10,20 @@ jobs:
  trufflehog:
    runs-on: ubuntu-latest
    steps:
-      - name: Checkout code
+    - shell: bash
-        uses: actions/checkout@v4
+      run: |
-        with:
+        if [ "${{ github.event_name }}" == "push" ]; then
-          fetch-depth: 0
+          echo "depth=$(($(jq length <<< '${{ toJson(github.event.commits) }}') + 2))" >> $GITHUB_ENV
-      - name: Secret Scanning
+          echo "branch=${{ github.ref_name }}" >> $GITHUB_ENV
-        uses: trufflesecurity/trufflehog@main
+        fi
-        with:
+        if [ "${{ github.event_name }}" == "pull_request" ]; then
-          extra_args: --results=verified,unknown
+          echo "depth=$((${{ github.event.pull_request.commits }}+2))" >> $GITHUB_ENV
          echo "branch=${{ github.event.pull_request.head.ref }}" >> $GITHUB_ENV
        fi
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        ref: ${{env.branch}}
        fetch-depth: ${{env.depth}}
    - name: Secret Scanning
      uses: trufflesecurity/trufflehog@main
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -61,10 +61,7 @@ feedback.
 The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
 Before you report an issue, we would really appreciate it if you could **make sure the bug was not
-already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) or on our [discord](https://discord.com/invite/hugging-face-879548962464493619) first. This helps us respond quicker to fixing issues related to the library versus general questions.
+already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) first. This helps us respond quicker to fixing issues related to the library versus general questions.
 > [!TIP]
 > We have a [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat), and we highly encourage you to ask all your questions there. There is always a chance your bug can be fixed with a simple flag 👾🔫
 Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
@ -132,7 +129,7 @@ You will need basic `git` proficiency to contribute to
 manual. Type `git --help` in a shell and enjoy! If you prefer books, [Pro
 Git](https://git-scm.com/book/en/v2) is a very good reference.
-You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main/setup.py#L449)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main/setup.py#L426)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
 1. Fork the [repository](https://github.com/huggingface/transformers) by
   clicking on the **[Fork](https://github.com/huggingface/transformers/fork)** button on the repository's page. This creates a copy of the code
@ -163,7 +160,7 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main
   If 🤗 Transformers was already installed in the virtual environment, remove
   it with `pip uninstall transformers` before reinstalling it in editable
   mode with the `-e` flag.
-
+   
   Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
   (PyTorch, TensorFlow and/or Flax) then do:
@ -222,7 +219,7 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main
   If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
   make sure you install the documentation builder:
-
+   
   ```bash
   pip install ".[docs]"
   ```
@ -341,12 +338,12 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_ne
 RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
 ```
-Like the slow tests, there are other environment variables available which are not enabled by default during testing:
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
 - `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
 - `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration.
 - `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration.
-More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).
+More environment variables and additional information can be found in the [testing_utils.py](src/transformers/testing_utils.py).
 🤗 Transformers uses `pytest` as a test runner only. It doesn't use any
 `pytest`-specific features in the test suite itself.
--- a/5
+++ b/5
@ -36,7 +36,6 @@ autogenerate_code: deps_table_update
 repo-consistency:
 	python utils/check_copies.py
 	python utils/check_modular_conversion.py
 	python utils/check_table.py
 	python utils/check_dummies.py
 	python utils/check_repo.py
@ -54,14 +53,15 @@ quality:
 	@python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
 	ruff check $(check_dirs) setup.py conftest.py
 	ruff format --check $(check_dirs) setup.py conftest.py
 	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
 	python utils/check_doc_toc.py
 	python utils/check_docstrings.py --check_all
 # Format source code automatically and check is there are any problems left that need manual fixing
 extra_style_checks:
 	python utils/custom_init_isort.py
 	python utils/sort_auto_mappings.py
 	python utils/check_doc_toc.py --fix_and_overwrite
@ -81,7 +81,6 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
 fix-copies:
 	python utils/check_copies.py --fix_and_overwrite
 	python utils/check_modular_conversion.py  --fix_and_overwrite
 	python utils/check_table.py --fix_and_overwrite
 	python utils/check_dummies.py --fix_and_overwrite
 	python utils/check_doctest_list.py --fix_and_overwrite
--- a/README.md
+++ b/README.md
@ -36,20 +36,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <b>English</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -128,10 +126,10 @@ incredible projects built in the vicinity of transformers.
 If you own or use a project that you believe should be part of the list, please open a PR to add it!
-## Serious about AI in your organisation? Build faster with the Hugging Face Enterprise Hub.
+## If you are looking for custom support from the Hugging Face team
-<a target="_blank" href="https://huggingface.co/enterprise">
+<a target="_blank" href="https://huggingface.co/support">
-    <img alt="Hugging Face Enterprise Hub" src="https://github.com/user-attachments/assets/247fb16d-d251-4583-96c4-d3d76dda4925">
+    <img alt="HuggingFace Expert Acceleration Program" src="https://cdn-media.huggingface.co/marketing/transformers/new-support-improved.png" style="max-width: 600px; border: 1px solid #eee; border-radius: 4px; box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);">
 </a><br>
 ## Quick tour
@ -249,43 +247,23 @@ The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/sta
 ### With pip
-This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 2.0+, and TensorFlow 2.6+.
+This repository is tested on Python 3.8+, Flax 0.4.1+, PyTorch 1.11+, and TensorFlow 2.6+.
 You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
 First, create a virtual environment with the version of Python you're going to use and activate it.
-**macOS/Linux**
+Then, you will need to install at least one of Flax, PyTorch, or TensorFlow.
-
+Please refer to [TensorFlow installation page](https://www.tensorflow.org/install/), [PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) installation pages regarding the specific installation command for your platform.
 ```python -m venv env
 source env/bin/activate
 ```
 **Windows**
 ``` python -m venv env
 env\Scripts\activate
 ```
 To use 🤗 Transformers, you must install at least one of Flax, PyTorch, or TensorFlow. Refer to the official installation guides for platform-specific commands:
 [TensorFlow installation page](https://www.tensorflow.org/install/), 
 [PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) 
 When one of those backends has been installed, 🤗 Transformers can be installed using pip as follows:
-```
+```bash
 pip install transformers
 ```
 If you'd like to play with the examples or need the bleeding edge of the code and can't wait for a new release, you must [install the library from source](https://huggingface.co/docs/transformers/installation#installing-from-source).
 ```
 git clone https://github.com/huggingface/transformers.git
 cd transformers
 pip install .
 ```
 ### With conda
 🤗 Transformers can be installed using conda as follows:
--- a/i18n/README_de.md
+++ b/i18n/README_de.md
@ -36,20 +36,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
        <b>Deutsch</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -246,7 +244,7 @@ Das Modell selbst ist ein reguläres [PyTorch `nn.Module`](https://pytorch.org/d
 ### Mit pip
-Dieses Repository wurde mit Python 3.9+, Flax 0.4.1+, PyTorch 2.0+ und TensorFlow 2.6+ getestet.
+Dieses Repository wurde mit Python 3.8+, Flax 0.4.1+, PyTorch 1.11+ und TensorFlow 2.6+ getestet.
 Sie sollten 🤗 Transformers in einer [virtuellen Umgebung](https://docs.python.org/3/library/venv.html) installieren. Wenn Sie mit virtuellen Python-Umgebungen nicht vertraut sind, schauen Sie sich den [Benutzerleitfaden](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/) an.
--- a/i18n/README_es.md
+++ b/i18n/README_es.md
@ -31,20 +31,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
        <b>Español</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -222,7 +220,7 @@ El modelo en si es un [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.h
 ### Con pip
-Este repositorio está probado en Python 3.9+, Flax 0.4.1+, PyTorch 2.0+ y TensorFlow 2.6+.
+Este repositorio está probado en Python 3.8+, Flax 0.4.1+, PyTorch 1.11+ y TensorFlow 2.6+.
 Deberías instalar 🤗 Transformers en un [entorno virtual](https://docs.python.org/3/library/venv.html). Si no estas familiarizado con los entornos virtuales de Python, consulta la [guía de usuario](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
--- a/i18n/README_fr.md
+++ b/i18n/README_fr.md
@ -36,20 +36,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
        <b>Français</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -243,7 +241,7 @@ Le modèle lui-même est un module [`nn.Module` PyTorch](https://pytorch.org/doc
 ### Avec pip
-Ce référentiel est testé sur Python 3.9+, Flax 0.4.1+, PyTorch 2.0+ et TensorFlow 2.6+.
+Ce référentiel est testé sur Python 3.8+, Flax 0.4.1+, PyTorch 1.11+ et TensorFlow 2.6+.
 Vous devriez installer 🤗 Transformers dans un [environnement virtuel](https://docs.python.org/3/library/venv.html). Si vous n'êtes pas familier avec les environnements virtuels Python, consultez le [guide utilisateur](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
--- a/i18n/README_hd.md
+++ b/i18n/README_hd.md
@ -56,20 +56,18 @@ checkpoint: जाँच बिंदु
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
        <b>हिन्दी</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -198,7 +196,7 @@ checkpoint: जाँच बिंदु
 ### पिप का उपयोग करना
-इस रिपॉजिटरी का परीक्षण Python 3.9+, Flax 0.4.1+, PyTorch 2.0+ और TensorFlow 2.6+ के तहत किया गया है।
+इस रिपॉजिटरी का परीक्षण Python 3.8+, Flax 0.4.1+, PyTorch 1.11+ और TensorFlow 2.6+ के तहत किया गया है।
 आप [वर्चुअल एनवायरनमेंट](https://docs.python.org/3/library/venv.html) में 🤗 ट्रांसफॉर्मर इंस्टॉल कर सकते हैं। यदि आप अभी तक पायथन के वर्चुअल एनवायरनमेंट से परिचित नहीं हैं, तो कृपया इसे [उपयोगकर्ता निर्देश](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/) पढ़ें।
--- a/i18n/README_ja.md
+++ b/i18n/README_ja.md
@ -66,20 +66,18 @@ user: ユーザ
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
        <b>日本語</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -256,7 +254,7 @@ Hugging Faceチームによって作られた **[トランスフォーマーを
 ### pipにて
-このリポジトリは、Python 3.9+, Flax 0.4.1+, PyTorch 2.0+, TensorFlow 2.6+ でテストされています。
+このリポジトリは、Python 3.8+, Flax 0.4.1+, PyTorch 1.11+, TensorFlow 2.6+ でテストされています。
 🤗Transformersは[仮想環境](https://docs.python.org/3/library/venv.html)にインストールする必要があります。Pythonの仮想環境に慣れていない場合は、[ユーザーガイド](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/)を確認してください。
--- a/i18n/README_ko.md
+++ b/i18n/README_ko.md
@ -15,15 +15,10 @@ limitations under the License.
 -->
 <p align="center">
-  <picture>
+    <br>
-    <source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/transformers-logo-dark.svg">
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers_logo_name.png" width="400"/>
-    <source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/transformers-logo-light.svg">
+    <br>
    <img alt="Hugging Face Transformers Library" src="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/transformers-logo-light.svg" width="352" height="59" style="max-width: 100%;">
  </picture>
  <br/>
  <br/>
 </p>
 <p align="center">
    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
@ -36,39 +31,30 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
        <b>한국어</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
 <h3 align="center">
-    <p> Jax, Pytorch, TensorFlow를 위한 최첨단 머신러닝</p>
+    <p> Jax, Pytorch, TensorFlow를 위한 최첨단 자연어처리</p>
 </h3>
 <h3 align="center">
    <a href="https://hf.co/course"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/course_banner.png"></a>
 </h3>
-🤗 Transformers는 텍스트, 비전, 오디오와 같은 다양한 분야에서 여러 과제를 수행하는 수천 개의 사전 학습된 모델을 제공합니다.
+🤗 Transformers는 분류, 정보 추출, 질문 답변, 요약, 번역, 문장 생성 등을 100개 이상의 언어로 수행할 수 있는 수천개의 사전학습된 모델을 제공합니다. 우리의 목표는 모두가 최첨단의 NLP 기술을 쉽게 사용하는 것입니다.
 제공되는 모델을 통해 다음 과제를 수행할 수 있습니다.
 - 📝 텍스트: 100개 이상의 언어들로, 텍스트 분류, 정보 추출, 질문 답변, 요약, 번역 및 문장 생성
 - 🖼️ 이미지: 이미지 분류(Image Classification), 객체 탐지(Object Detection) 및 분할(Segmentation)
 - 🗣️ 오디오: 음성 인식(Speech Recognition) 및 오디오 분류(Audio Classification)
 Transformer의 모델은 표를 통한 질의응답(Table QA), 광학 문자 인식(Optical Character Recognition), 스캔 한 문서에서 정보 추출, 비디오 분류 및 시각적 질의응답과 같은 **여러 분야가 결합된** 과제 또한 수행할 수 있습니다.
 🤗 Transformers는 이러한 사전학습 모델을 빠르게 다운로드해 특정 텍스트에 사용하고, 원하는 데이터로 fine-tuning해 커뮤니티나 우리의 [모델 허브](https://huggingface.co/models)에 공유할 수 있도록 API를 제공합니다. 또한, 모델 구조를 정의하는 각 파이썬 모듈은 완전히 독립적이여서 연구 실험을 위해 손쉽게 수정할 수 있습니다.
@ -76,11 +62,9 @@ Transformer의 모델은 표를 통한 질의응답(Table QA), 광학 문자 인
 ## 온라인 데모
-대부분의 모델을 [모델 허브](https://huggingface.co/models) 페이지에서 바로 테스트해 볼 수 있습니다. 공개 및 비공개 모델을 위한 [비공개 모델 호스팅, 버전 관리, 추론 API](https://huggingface.co/pricing)도 제공합니다.
+대부분의 모델을 [모델 허브](https://huggingface.co/models) 페이지에서 바로 테스트해볼 수 있습니다. 공개 및 비공개 모델을 위한 [비공개 모델 호스팅, 버전 관리, 추론 API](https://huggingface.co/pricing)도 제공합니다.
-아래 몇 가지 예시가 있습니다:  
+예시:
 자연어 처리:   
 - [BERT로 마스킹된 단어 완성하기](https://huggingface.co/google-bert/bert-base-uncased?text=Paris+is+the+%5BMASK%5D+of+France)
 - [Electra를 이용한 개체명 인식](https://huggingface.co/dbmdz/electra-large-discriminator-finetuned-conll03-english?text=My+name+is+Sarah+and+I+live+in+London+city)
 - [GPT-2로 텍스트 생성하기](https://huggingface.co/openai-community/gpt2?text=A+long+time+ago%2C+)
@ -89,100 +73,45 @@ Transformer의 모델은 표를 통한 질의응답(Table QA), 광학 문자 인
 - [DistilBERT를 이용한 질문 답변](https://huggingface.co/distilbert/distilbert-base-uncased-distilled-squad?text=Which+name+is+also+used+to+describe+the+Amazon+rainforest+in+English%3F&context=The+Amazon+rainforest+%28Portuguese%3A+Floresta+Amaz%C3%B4nica+or+Amaz%C3%B4nia%3B+Spanish%3A+Selva+Amaz%C3%B3nica%2C+Amazon%C3%ADa+or+usually+Amazonia%3B+French%3A+For%C3%AAt+amazonienne%3B+Dutch%3A+Amazoneregenwoud%29%2C+also+known+in+English+as+Amazonia+or+the+Amazon+Jungle%2C+is+a+moist+broadleaf+forest+that+covers+most+of+the+Amazon+basin+of+South+America.+This+basin+encompasses+7%2C000%2C000+square+kilometres+%282%2C700%2C000+sq+mi%29%2C+of+which+5%2C500%2C000+square+kilometres+%282%2C100%2C000+sq+mi%29+are+covered+by+the+rainforest.+This+region+includes+territory+belonging+to+nine+nations.+The+majority+of+the+forest+is+contained+within+Brazil%2C+with+60%25+of+the+rainforest%2C+followed+by+Peru+with+13%25%2C+Colombia+with+10%25%2C+and+with+minor+amounts+in+Venezuela%2C+Ecuador%2C+Bolivia%2C+Guyana%2C+Suriname+and+French+Guiana.+States+or+departments+in+four+nations+contain+%22Amazonas%22+in+their+names.+The+Amazon+represents+over+half+of+the+planet%27s+remaining+rainforests%2C+and+comprises+the+largest+and+most+biodiverse+tract+of+tropical+rainforest+in+the+world%2C+with+an+estimated+390+billion+individual+trees+divided+into+16%2C000+species)
 - [T5로 번역하기](https://huggingface.co/google-t5/t5-base?text=My+name+is+Wolfgang+and+I+live+in+Berlin)
 컴퓨터 비전:
 - [ViT와 함께하는 이미지 분류](https://huggingface.co/google/vit-base-patch16-224)
 - [DETR로 객체 탐지하기](https://huggingface.co/facebook/detr-resnet-50)
 - [SegFormer로 의미적 분할(semantic segmentation)하기](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512)
 - [Mask2Former로 판옵틱 분할(panoptic segmentation)하기](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic)
 - [Depth Anything으로 깊이 추정(depth estimation)하기](https://huggingface.co/docs/transformers/main/model_doc/depth_anything)
 - [VideoMAE와 함께하는 비디오 분류](https://huggingface.co/docs/transformers/model_doc/videomae)
 - [OneFormer로 유니버설 분할(universal segmentation)하기](https://huggingface.co/shi-labs/oneformer_ade20k_dinat_large)
 오디오:
 - [Whisper와 함께하는 자동 음성 인식](https://huggingface.co/openai/whisper-large-v3)
 - [Wav2Vec2로 키워드 검출(keyword spotting)하기](https://huggingface.co/superb/wav2vec2-base-superb-ks)
 - [Audio Spectrogram Transformer로 오디오 분류하기](https://huggingface.co/MIT/ast-finetuned-audioset-10-10-0.4593)
 멀티 모달(Multimodal Task):
 - [TAPAS로 표 안에서 질문 답변하기](https://huggingface.co/google/tapas-base-finetuned-wtq)
 - [ViLT와 함께하는 시각적 질의응답](https://huggingface.co/dandelin/vilt-b32-finetuned-vqa)
 - [LLaVa로 이미지에 설명 넣기](https://huggingface.co/llava-hf/llava-1.5-7b-hf)
 - [SigLIP와 함께하는 제로 샷(zero-shot) 이미지 분류](https://huggingface.co/google/siglip-so400m-patch14-384)
 - [LayoutLM으로 문서 안에서 질문 답변하기](https://huggingface.co/impira/layoutlm-document-qa)
 - [X-CLIP과 함께하는 제로 샷(zero-shot) 비디오 분류](https://huggingface.co/docs/transformers/model_doc/xclip)
 - [OWLv2로 진행하는 제로 샷(zero-shot) 객체 탐지](https://huggingface.co/docs/transformers/en/model_doc/owlv2)
 - [CLIPSeg로 진행하는 제로 샷(zero-shot) 이미지 분할](https://huggingface.co/docs/transformers/model_doc/clipseg)
 - [SAM과 함께하는 자동 마스크 생성](https://huggingface.co/docs/transformers/model_doc/sam)
 **[Transformer와 글쓰기](https://transformer.huggingface.co)** 는 이 저장소의 텍스트 생성 능력에 관한 Hugging Face 팀의 공식 데모입니다.
-## Transformers를 사용한 100개의 프로젝트
+## Hugging Face 팀의 커스텀 지원을 원한다면
-Transformers는 사전 학습된 모델들을 이용하는 도구를 넘어 Transformers와 함께 빌드 된 프로젝트 및 Hugging Face Hub를 위한 하나의 커뮤니티입니다. 우리는 Transformers를 통해 개발자, 연구자, 학생, 교수, 엔지니어 및 모든 이들이 꿈을 품은 프로젝트(Dream Project)를 빌드 할 수 있길 바랍니다.
+<a target="_blank" href="https://huggingface.co/support">
-
+    <img alt="HuggingFace Expert Acceleration Program" src="https://huggingface.co/front/thumbnails/support.png" style="max-width: 600px; border: 1px solid #eee; border-radius: 4px; box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);">
 Transformers에 달린 100,000개의 별을 축하하기 위해, 우리는 커뮤니티를 주목하고자 Transformers를 품고 빌드 된 100개의 어마어마한 프로젝트들을 선별하여 [awesome-transformers](https://github.com/huggingface/transformers/blob/main/awesome-transformers.md) 페이지에 나열하였습니다.
 만일 소유한 혹은 사용하고 계신 프로젝트가 이 리스트에 꼭 등재되어야 한다고 믿으신다면, PR을 열고 추가하여 주세요!
 ## 조직 안에서 AI 사용에 대해 진지하게 고민 중이신가요? Hugging Face Enterprise Hub을 통해 더 빨리 구축해 보세요.
 <a target="_blank" href="https://huggingface.co/enterprise">
    <img alt="Hugging Face Enterprise Hub" src="https://github.com/user-attachments/assets/247fb16d-d251-4583-96c4-d3d76dda4925">
 </a><br>
 ## 퀵 투어
-주어진 입력(텍스트, 이미지, 오디오, ...)에 바로 모델을 사용할 수 있도록, 우리는 `pipeline` API를 제공합니다. Pipeline은 사전학습 모델과 그 모델을 학습할 때 적용한 전처리 방식을 하나로 합칩니다. 다음은 긍정적인 텍스트와 부정적인 텍스트를 분류하기 위해 pipeline을 사용한 간단한 예시입니다:
+원하는 텍스트에 바로 모델을 사용할 수 있도록, 우리는 `pipeline` API를 제공합니다. Pipeline은 사전학습 모델과 그 모델을 학습할 때 적용한 전처리 방식을 하나로 합칩니다. 다음은 긍정적인 텍스트와 부정적인 텍스트를 분류하기 위해 pipeline을 사용한 간단한 예시입니다:
 ```python
 >>> from transformers import pipeline
-# 감정 분석 파이프라인을 할당하세요
+# Allocate a pipeline for sentiment-analysis
 >>> classifier = pipeline('sentiment-analysis')
 >>> classifier('We are very happy to introduce pipeline to the transformers repository.')
 [{'label': 'POSITIVE', 'score': 0.9996980428695679}]
 ```
-코드의 두 번째 줄은 pipeline이 사용하는 사전학습 모델을 다운로드하고 캐시로 저장합니다. 세 번째 줄에선 그 모델이 주어진 텍스트를 평가합니다. 여기서 모델은 99.97%의 확률로 텍스트가 긍정적이라고 평가했습니다.
+코드의 두번째 줄은 pipeline이 사용하는 사전학습 모델을 다운로드하고 캐시로 저장합니다. 세번째 줄에선 그 모델이 주어진 텍스트를 평가합니다. 여기서 모델은 99.97%의 확률로 텍스트가 긍정적이라고 평가했습니다.
-자연어 처리(NLP) 뿐만 아니라 컴퓨터 비전, 발화(Speech) 과제들을 사전 학습된 `pipeline`으로 바로 수행할 수 있습니다. 예를 들어, 사진에서 손쉽게 객체들을 탐지할 수 있습니다.:
+많은 NLP 과제들을 `pipeline`으로 바로 수행할 수 있습니다. 예를 들어, 질문과 문맥이 주어지면 손쉽게 답변을 추출할 수 있습니다:
 ``` python
 >>> import requests
 >>> from PIL import Image
 >>> from transformers import pipeline
-# 귀여운 고양이가 있는 이미지를 다운로드하세요
+# Allocate a pipeline for question-answering
->>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png"
+>>> question_answerer = pipeline('question-answering')
->>> image_data = requests.get(url, stream=True).raw
+>>> question_answerer({
->>> image = Image.open(image_data)
+...     'question': 'What is the name of the repository ?',
 ...     'context': 'Pipeline has been included in the huggingface/transformers repository'
 ... })
 {'score': 0.30970096588134766, 'start': 34, 'end': 58, 'answer': 'huggingface/transformers'}
 # 객체 감지를 위한 파이프라인을 할당하세요
 >>> object_detector = pipeline('object-detection')
 >>> object_detector(image)
 [{'score': 0.9982201457023621,
  'label': 'remote',
  'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}},
 {'score': 0.9960021376609802,
  'label': 'remote',
  'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}},
 {'score': 0.9954745173454285,
  'label': 'couch',
  'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}},
 {'score': 0.9988006353378296,
  'label': 'cat',
  'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}},
 {'score': 0.9986783862113953,
  'label': 'cat',
  'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}]
 ```
 위와 같이, 우리는 이미지에서 탐지된 객체들에 대하여 객체를 감싸는 박스와 확률 리스트를 얻을 수 있습니다. 왼쪽이 원본 이미지이며 오른쪽은 해당 이미지에 탐지된 결과를 표시하였습니다.
 <h3 align="center">
    <a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png" width="400"></a>
    <a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample_post_processed.png" width="400"></a>
 </h3>
-[이 튜토리얼](https://huggingface.co/docs/transformers/ko/task_summary)에서 `pipeline` API가 지원하는 다양한 과제를 확인할 수 있습니다.
+답변뿐만 아니라, 여기에 사용된 사전학습 모델은 확신도와 토크나이즈된 문장 속 답변의 시작점, 끝점까지 반환합니다. [이 튜토리얼](https://huggingface.co/docs/transformers/task_summary)에서 `pipeline` API가 지원하는 다양한 과제를 확인할 수 있습니다.
 코드 3줄로 원하는 과제에 맞게 사전학습 모델을 다운로드 받고 사용할 수 있습니다. 다음은 PyTorch 버전입니다:
 ```python
@ -207,24 +136,24 @@ Transformers에 달린 100,000개의 별을 축하하기 위해, 우리는 커
 토크나이저는 사전학습 모델의 모든 전처리를 책임집니다. 그리고 (위의 예시처럼) 1개의 스트링이나 리스트도 처리할 수 있습니다. 토크나이저는 딕셔너리를 반환하는데, 이는 다운스트림 코드에 사용하거나 언패킹 연산자 ** 를 이용해 모델에 바로 전달할 수도 있습니다.
-모델 자체는 일반적으로 사용되는 [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module)이나 [TensorFlow `tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model)입니다. [이 튜토리얼](https://huggingface.co/docs/transformers/ko/training)은 이러한 모델을 표준적인 PyTorch나 TensorFlow 학습 과정에서 사용하는 방법, 또는 새로운 데이터로 파인 튜닝하기 위해 `Trainer` API를 사용하는 방법을 설명해 줍니다.
+모델 자체는 일반적으로 사용되는 [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module)나 [TensorFlow `tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model)입니다. [이 튜토리얼](https://huggingface.co/transformers/training.html)은 이러한 모델을 표준적인 PyTorch나 TensorFlow 학습 과정에서 사용하는 방법, 또는 새로운 데이터로 fine-tune하기 위해 `Trainer` API를 사용하는 방법을 설명해줍니다.
 ## 왜 transformers를 사용해야 할까요?
 1. 손쉽게 사용할 수 있는 최첨단 모델:
-    - 자연어 이해(NLU)와 생성(NLG), 컴퓨터 비전, 오디오 과제에서 뛰어난 성능을 보입니다.
+    - NLU와 NLG 과제에서 뛰어난 성능을 보입니다.
-    - 교육자와 실무자에게 진입 장벽이 낮습니다.
+    - 교육자 실무자에게 진입 장벽이 낮습니다.
    - 3개의 클래스만 배우면 바로 사용할 수 있습니다.
    - 하나의 API로 모든 사전학습 모델을 사용할 수 있습니다.
 1. 더 적은 계산 비용, 더 적은 탄소 발자국:
    - 연구자들은 모델을 계속 다시 학습시키는 대신 학습된 모델을 공유할 수 있습니다.
    - 실무자들은 학습에 필요한 시간과 비용을 절약할 수 있습니다.
-    - 모든 분야를 통틀어서 400,000개 이상의 사전 학습된 모델이 있는 수십 개의 아키텍처.
+    - 수십개의 모델 구조, 2,000개 이상의 사전학습 모델, 100개 이상의 언어로 학습된 모델 등.
 1. 모델의 각 생애주기에 적합한 프레임워크:
    - 코드 3줄로 최첨단 모델을 학습하세요.
-    - 목적에 알맞게 모델을 TF2.0/Pytorch/Jax 프레임 워크 중 하나로 이동시키세요.
+    - 자유롭게 모델을 TF2.0나 PyTorch 프레임워크로 변환하세요.
    - 학습, 평가, 공개 등 각 단계에 맞는 프레임워크를 원하는대로 선택하세요.
 1. 필요한 대로 모델이나 예시를 커스터마이즈하세요:
@ -235,14 +164,14 @@ Transformers에 달린 100,000개의 별을 축하하기 위해, 우리는 커
 ## 왜 transformers를 사용하지 말아야 할까요?
 - 이 라이브러리는 신경망 블록을 만들기 위한 모듈이 아닙니다. 연구자들이 여러 파일을 살펴보지 않고 바로 각 모델을 사용할 수 있도록, 모델 파일 코드의 추상화 수준을 적정하게 유지했습니다.
- 학습 API는 모든 모델에 적용할 수 있도록 만들어지진 않았지만, 라이브러리가 제공하는 모델들에 적용할 수 있도록 최적화되었습니다. 일반적인 머신 러닝을 위해선, 다른 라이브러리를 사용하세요(예를 들면, [Accelerate](https://huggingface.co/docs/accelerate/index)).
+- 학습 API는 모든 모델에 적용할 수 있도록 만들어지진 않았지만, 라이브러리가 제공하는 모델들에 적용할 수 있도록 최적화되었습니다. 일반적인 머신 러닝을 위해선, 다른 라이브러리를 사용하세요.
 - 가능한 많은 사용 예시를 보여드리고 싶어서, [예시 폴더](https://github.com/huggingface/transformers/tree/main/examples)의 스크립트를 준비했습니다. 이 스크립트들을 수정 없이 특정한 문제에 바로 적용하지 못할 수 있습니다. 필요에 맞게 일부 코드를 수정해야 할 수 있습니다.
 ## 설치
 ### pip로 설치하기
-이 저장소는 Python 3.9+, Flax 0.4.1+, PyTorch 2.0+, TensorFlow 2.6+에서 테스트 되었습니다.
+이 저장소는 Python 3.8+, Flax 0.4.1+, PyTorch 1.11+, TensorFlow 2.6+에서 테스트 되었습니다.
 [가상 환경](https://docs.python.org/3/library/venv.html)에 🤗 Transformers를 설치하세요. Python 가상 환경에 익숙하지 않다면, [사용자 가이드](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/)를 확인하세요.
@ -257,7 +186,7 @@ Transformers에 달린 100,000개의 별을 축하하기 위해, 우리는 커
 pip install transformers
 ```
-예시들을 체험해보고 싶거나, 최최최첨단 코드를 원하거나, 새로운 버전이 나올 때까지 기다릴 수 없다면 [라이브러리를 소스에서 바로 설치](https://huggingface.co/docs/transformers/ko/installation#install-from-source)하셔야 합니다.
+예시들을 체험해보고 싶거나, 최최최첨단 코드를 원하거나, 새로운 버전이 나올 때까지 기다릴 수 없다면 [라이브러리를 소스에서 바로 설치](https://huggingface.co/docs/transformers/installation#installing-from-source)하셔야 합니다.
 ### conda로 설치하기
@ -271,30 +200,29 @@ conda install conda-forge::transformers
 Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는 방법을 확인하세요.
 > **_노트:_** 윈도우 환경에서 캐싱의 이점을 위해 개발자 모드를 활성화할 수 있습니다. 만약 여러분에게 있어서 선택이 아닌 필수라면 [이 이슈](https://github.com/huggingface/huggingface_hub/issues/1062)를 통해 알려주세요.
 ## 모델 구조
-**🤗 Transformers가 제공하는 [모든 모델 체크포인트](https://huggingface.co/models)** 는 huggingface.co [모델 허브](https://huggingface.co/models)에 완벽히 연동되어 있습니다. [개인](https://huggingface.co/users)과 [기관](https://huggingface.co/organizations)이 모델 허브에 직접 업로드할 수 있습니다.
+**🤗 Transformers가 제공하는 [모든 모델 체크포인트](https://huggingface.co/models)** 는 huggingface.co [모델 허브](https://huggingface.co)에 완벽히 연동되어 있습니다. [개인](https://huggingface.co/users)과 [기관](https://huggingface.co/organizations)이 모델 허브에 직접 업로드할 수 있습니다.
 현재 사용 가능한 모델 체크포인트의 개수: ![](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen)
-🤗 Transformers는 다음 모델들을 제공합니다: 각 모델의 요약은 [여기](https://huggingface.co/docs/transformers/ko/model_summary)서 확인하세요.
+🤗 Transformers는 다음 모델들을 제공합니다: 각 모델의 요약은 [여기](https://huggingface.co/docs/transformers/model_summary)서 확인하세요.
-각 모델이 Flax, PyTorch, TensorFlow으로 구현되었는지 또는 🤗 Tokenizers 라이브러리가 지원하는 토크나이저를 사용하는지 확인하려면, [이 표](https://huggingface.co/docs/transformers/ko/index#supported-framework)를 확인하세요.
+각 모델이 Flax, PyTorch, TensorFlow으로 구현되었는지 또는 🤗 Tokenizers 라이브러리가 지원하는 토크나이저를 사용하는지 확인하려면, [이 표](https://huggingface.co/docs/transformers/index#supported-frameworks)를 확인하세요.
-이 구현은 여러 데이터로 검증되었고 (예시 스크립트를 참고하세요) 오리지널 구현의 성능과 같아야 합니다. [도큐먼트](https://github.com/huggingface/transformers/tree/main/examples)의 Examples 섹션에서 성능에 대한 자세한 설명을 확인할 수 있습니다.
+이 구현은 여러 데이터로 검증되었고 (예시 스크립트를 참고하세요) 오리지널 구현의 성능과 같아야 합니다. [도큐먼트](https://huggingface.co/docs/transformers/examples)의 Examples 섹션에서 성능에 대한 자세한 설명을 확인할 수 있습니다.
 ## 더 알아보기
 | 섹션 | 설명 |
 |-|-|
-| [도큐먼트](https://huggingface.co/transformers/ko/) | 전체 API 도큐먼트와 튜토리얼 |
+| [도큐먼트](https://huggingface.co/transformers/) | 전체 API 도큐먼트와 튜토리얼 |
-| [과제 요약](https://huggingface.co/docs/transformers/ko/task_summary) | 🤗 Transformers가 지원하는 과제들 |
+| [과제 요약](https://huggingface.co/docs/transformers/task_summary) | 🤗 Transformers가 지원하는 과제들 |
-| [전처리 튜토리얼](https://huggingface.co/docs/transformers/ko/preprocessing) | `Tokenizer` 클래스를 이용해 모델을 위한 데이터 준비하기 |
+| [전처리 튜토리얼](https://huggingface.co/docs/transformers/preprocessing) | `Tokenizer` 클래스를 이용해 모델을 위한 데이터 준비하기 |
-| [학습과 파인 튜닝](https://huggingface.co/docs/transformers/ko/training) | 🤗 Transformers가 제공하는 모델 PyTorch/TensorFlow 학습 과정과 `Trainer` API에서 사용하기 |
+| [학습과 fine-tuning](https://huggingface.co/docs/transformers/training) | 🤗 Transformers가 제공하는 모델 PyTorch/TensorFlow 학습 과정과 `Trainer` API에서 사용하기 |
-| [퀵 투어: 파인 튜닝/사용 스크립트](https://github.com/huggingface/transformers/tree/main/examples) | 다양한 과제에서 모델을 파인 튜닝하는 예시 스크립트 |
+| [퀵 투어: Fine-tuning/사용 스크립트](https://github.com/huggingface/transformers/tree/main/examples) | 다양한 과제에서 모델 fine-tuning하는 예시 스크립트 |
-| [모델 공유 및 업로드](https://huggingface.co/docs/transformers/ko/model_sharing) | 커뮤니티에 파인 튜닝된 모델을 업로드 및 공유하기 |
+| [모델 공유 및 업로드](https://huggingface.co/docs/transformers/model_sharing) | 커뮤니티에 fine-tune된 모델을 업로드 및 공유하기 |
 | [마이그레이션](https://huggingface.co/docs/transformers/migration) | `pytorch-transformers`나 `pytorch-pretrained-bert`에서 🤗 Transformers로 이동하기|
 ## 인용
--- a/i18n/README_pt-br.md
+++ b/i18n/README_pt-br.md
@ -36,20 +36,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
        <b>Рortuguês</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -253,7 +251,7 @@ O modelo em si é um [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.ht
 ### Com pip
-Este repositório é testado no Python 3.9+, Flax 0.4.1+, PyTorch 2.0+ e TensorFlow 2.6+.
+Este repositório é testado no Python 3.8+, Flax 0.4.1+, PyTorch 1.11+ e TensorFlow 2.6+.
 Você deve instalar o 🤗 Transformers em um [ambiente virtual](https://docs.python.org/3/library/venv.html). Se você não está familiarizado com ambientes virtuais em Python, confira o [guia do usuário](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
--- a/i18n/README_ru.md
+++ b/i18n/README_ru.md
@ -36,20 +36,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
        <b>Русский</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    <p>
 </h4>
@ -77,7 +75,7 @@ limitations under the License.
 ## Онлайн демонстрация
-Большинство наших моделей можно протестировать непосредственно на их страницах с [сайта](https://huggingface.co/models). Мы также предлагаем [приватный хостинг моделей, контроль версий и API для выводов](https://huggingface.co/pricing) для публичных и частных моделей.
+Большинство наших моделей можно протестировать непосредственно на их страницах с [сайта](https://huggingface.co/models). Мы также предлагаем [привтаный хостинг моделей, контроль версий и API для выводов](https://huggingface.co/pricing) для публичных и частных моделей.
 Вот несколько примеров:
@ -244,7 +242,7 @@ Hugging Face Hub. Мы хотим, чтобы Transformers позволил ра
 ### С помощью pip
-Данный репозиторий протестирован на Python 3.9+, Flax 0.4.1+, PyTorch 2.0+ и TensorFlow 2.6+.
+Данный репозиторий протестирован на Python 3.8+, Flax 0.4.1+, PyTorch 1.11+ и TensorFlow 2.6+.
 Устанавливать 🤗 Transformers следует в [виртуальной среде](https://docs.python.org/3/library/venv.html). Если вы не знакомы с виртуальными средами Python, ознакомьтесь с [руководством пользователя](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
--- a/i18n/README_te.md
+++ b/i18n/README_te.md
@ -38,20 +38,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
        <b>తెలుగు</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -246,7 +244,7 @@ limitations under the License.
 ### పిప్ తో
-ఈ రిపోజిటరీ పైథాన్ 3.9+, ఫ్లాక్స్ 0.4.1+, PyTorch 2.0+ మరియు TensorFlow 2.6+లో పరీక్షించబడింది.
+ఈ రిపోజిటరీ పైథాన్ 3.8+, ఫ్లాక్స్ 0.4.1+, PyTorch 1.11+ మరియు TensorFlow 2.6+లో పరీక్షించబడింది.
 మీరు [వర్చువల్ వాతావరణం](https://docs.python.org/3/library/venv.html)లో 🤗 ట్రాన్స్‌ఫార్మర్‌లను ఇన్‌స్టాల్ చేయాలి. మీకు పైథాన్ వర్చువల్ పరిసరాల గురించి తెలియకుంటే, [యూజర్ గైడ్](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/) చూడండి.
--- a/i18n/README_vi.md
+++ b/i18n/README_vi.md
@ -36,20 +36,18 @@ limitations under the License.
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
        <b>Tiếng việt</b> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -245,7 +243,7 @@ Chính mô hình là một [Pytorch `nn.Module`](https://pytorch.org/docs/stable
 ### Sử dụng pip
-Thư viện này được kiểm tra trên Python 3.9+, Flax 0.4.1+, PyTorch 2.0+ và TensorFlow 2.6+.
+Thư viện này được kiểm tra trên Python 3.8+, Flax 0.4.1+, PyTorch 1.11+ và TensorFlow 2.6+.
 Bạn nên cài đặt 🤗 Transformers trong một [môi trường ảo Python](https://docs.python.org/3/library/venv.html). Nếu bạn chưa quen với môi trường ảo Python, hãy xem [hướng dẫn sử dụng](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
--- a/i18n/README_zh-hans.md
+++ b/i18n/README_zh-hans.md
@ -57,19 +57,17 @@ checkpoint: 检查点
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
        <b>简体中文</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hant.md">繁體中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hant.md">繁體中文</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -198,7 +196,7 @@ checkpoint: 检查点
 ### 使用 pip
-这个仓库已在 Python 3.9+、Flax 0.4.1+、PyTorch 2.0+ 和 TensorFlow 2.6+ 下经过测试。
+这个仓库已在 Python 3.8+、Flax 0.4.1+、PyTorch 1.11+ 和 TensorFlow 2.6+ 下经过测试。
 你可以在[虚拟环境](https://docs.python.org/3/library/venv.html)中安装 🤗 Transformers。如果你还不熟悉 Python 的虚拟环境，请阅此[用户说明](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/)。
--- a/i18n/README_zh-hant.md
+++ b/i18n/README_zh-hant.md
@ -68,20 +68,18 @@ user: 使用者
 <h4 align="center">
    <p>
        <a href="https://github.com/huggingface/transformers/">English</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_zh-hans.md">简体中文</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_zh-hans.md">简体中文</a> |
        <b>繁體中文</b> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ko.md">한국어</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ko.md">한국어</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_es.md">Español</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_es.md">Español</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ja.md">日本語</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_hd.md">हिन्दी</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_pt-br.md">Рortuguês</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_te.md">తెలుగు</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_fr.md">Français</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_de.md">Deutsch</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
    </p>
 </h4>
@ -210,7 +208,7 @@ Tokenizer 為所有的預訓練模型提供了預處理，並可以直接轉換
 ### 使用 pip
-這個 Repository 已在 Python 3.9+、Flax 0.4.1+、PyTorch 2.0+ 和 TensorFlow 2.6+ 下經過測試。
+這個 Repository 已在 Python 3.8+、Flax 0.4.1+、PyTorch 1.11+ 和 TensorFlow 2.6+ 下經過測試。
 你可以在[虛擬環境](https://docs.python.org/3/library/venv.html)中安裝 🤗 Transformers。如果你還不熟悉 Python 的虛擬環境，請閱此[使用者指引](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/)。
--- a/SECURITY.md
+++ b/SECURITY.md
@ -36,4 +36,5 @@ Please inspect the code of the tools before passing them to the Agent to protect
 ## Reporting a Vulnerability
-Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software.
+🤗 Please feel free to submit vulnerability reports to our private bug bounty program at https://hackerone.com/hugging_face. You'll need to request access to the program by emailing security@huggingface.co.
 Note that you'll need to be invited to our program, so send us a quick email at security@huggingface.co if you've found a vulnerability.
--- a/awesome-transformers.md
+++ b/awesome-transformers.md
@ -596,7 +596,7 @@ Keywords: Data-Centric AI, Data Quality, Noisy Labels, Outlier Detection, Active
 ## [BentoML](https://github.com/bentoml/BentoML)
-[BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. 
+[BentoML](https://github.com/bentoml) is the unified framework for for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. 
 All Hugging Face models and pipelines can be seamlessly integrated into BentoML applications, enabling the running of models on the most suitable hardware and independent scaling based on usage.
 Keywords: BentoML, Framework, Deployment, AI Applications
--- a/benchmark/README.md
+++ b/benchmark/README.md
@ -1,49 +0,0 @@
 # Benchmarks
 You might want to add new benchmarks.
 You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
 The expected function signature is the following:
 ```py
 def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
 ```
 ## Writing metrics to the database
 `MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
 cf [`llama.py`](./llama.py) to see an example of this in practice.
 ```py
 from benchmarks_entrypoint import MetricsRecorder
 import psycopg2
 def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
  metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
  benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
    # To collect device measurements
    metrics_recorder.collect_device_measurements(
        benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
    )
    # To collect your model measurements
    metrics_recorder.collect_model_measurements(
        benchmark_id,
        {
            "model_load_time": model_load_time,
            "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
            "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
            "first_eager_generate_time_secs": first_eager_generate_time,
            "second_eager_generate_time_secs": second_eager_generate_time,
            "time_to_first_token_secs": time_to_first_token,
            "time_to_second_token_secs": time_to_second_token,
            "time_to_third_token_secs": time_to_third_token,
            "time_to_next_token_mean_secs": mean_time_to_next_token,
            "first_compile_generate_time_secs": first_compile_generate_time,
            "second_compile_generate_time_secs": second_compile_generate_time,
            "third_compile_generate_time_secs": third_compile_generate_time,
            "fourth_compile_generate_time_secs": fourth_compile_generate_time,
        },
    )
 ```
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@ -101,7 +101,7 @@ def summarize(run_dir, metrics, expand_metrics=False):
        # post-processing of report: show a few selected/important metric
        for metric in metrics:
            keys = metric.split(".")
-            value = report.to_dict()
+            value = report
            current = metrics_values
            for key in keys:
                # Avoid KeyError when a user's specified metric has typo.
--- a/benchmark/benchmarks_entrypoint.py
+++ b/benchmark/benchmarks_entrypoint.py
@ -1,144 +0,0 @@
 import argparse
 import importlib.util
 import logging
 import os
 from typing import Dict
 import psycopg2
 import sys
 from psycopg2.extras import Json
 from psycopg2.extensions import register_adapter
 register_adapter(dict, Json)
 class ImportModuleException(Exception):
    pass
 class MetricsRecorder:
    def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
        self.conn = connection
        self.conn.autocommit = True
        self.logger = logger
        self.branch = branch
        self.commit_id = commit_id
        self.commit_msg = commit_msg
    def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
        """
        Creates a new benchmark, returns the benchmark id
        """
        # gpu_name: str, model_id: str
        with self.conn.cursor() as cur:
            cur.execute(
                "INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
                (self.branch, self.commit_id, self.commit_msg, metadata),
            )
            benchmark_id = cur.fetchone()[0]
            logger.debug(f"initialised benchmark #{benchmark_id}")
            return benchmark_id
    def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
        """
        Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
        """
        with self.conn.cursor() as cur:
            cur.execute(
                "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
                (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
            )
        self.logger.debug(
            f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
        )
    def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
        with self.conn.cursor() as cur:
            cur.execute(
                """
                INSERT INTO model_measurements (
                    benchmark_id,
                    measurements
                ) VALUES (%s, %s)
                """,
                (
                    benchmark_id,
                    measurements,
                ),
            )
        self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")
    def close(self):
        self.conn.close()
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 handler = logging.StreamHandler(sys.stdout)
 handler.setLevel(logging.INFO)
 formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
 handler.setFormatter(formatter)
 logger.addHandler(handler)
 def parse_arguments():
    """
    Parse command line arguments for the benchmarking CLI.
    """
    parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
    parser.add_argument(
        "branch",
        type=str,
        help="The branch name on which the benchmarking is performed.",
    )
    parser.add_argument(
        "commit_id",
        type=str,
        help="The commit hash on which the benchmarking is performed.",
    )
    parser.add_argument(
        "commit_msg",
        type=str,
        help="The commit message associated with the commit, truncated to 70 characters.",
    )
    args = parser.parse_args()
    return args.branch, args.commit_id, args.commit_msg
 def import_from_path(module_name, file_path):
    try:
        spec = importlib.util.spec_from_file_location(module_name, file_path)
        module = importlib.util.module_from_spec(spec)
        sys.modules[module_name] = module
        spec.loader.exec_module(module)
        return module
    except Exception as e:
        raise ImportModuleException(f"failed to load python module: {e}")
 if __name__ == "__main__":
    benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
    branch, commit_id, commit_msg = parse_arguments()
    for entry in os.scandir(benchmarks_folder_path):
        try:
            if not entry.name.endswith(".py"):
                continue
            if entry.path == __file__:
                continue
            logger.debug(f"loading: {entry.name}")
            module = import_from_path(entry.name.split(".")[0], entry.path)
            logger.info(f"runnning benchmarks in: {entry.name}")
            module.run_benchmark(logger, branch, commit_id, commit_msg)
        except ImportModuleException as e:
            logger.error(e)
        except Exception as e:
            logger.error(f"error running benchmarks for {entry.name}: {e}")
--- a/benchmark/default.yml
+++ b/benchmark/default.yml
@ -1,10 +0,0 @@
 apiVersion: 1
 providers:
  - name: 'Transformers Benchmarks'
    orgId: 1
    type: file
    updateIntervalSeconds: 10
    allowUiUpdates: true
    options:
      path: /etc/grafana/dashboards
--- a/benchmark/grafana_dashboard.json
+++ b/benchmark/grafana_dashboard.json
--- a/benchmark/grafana_datasource.yaml
+++ b/benchmark/grafana_datasource.yaml
@ -1,17 +0,0 @@
 apiVersion: 1
 datasources:
  - name: grafana-postgresql-datasource
    uid: be28nkzirtb0gd
    type: postgres
    url: $GRAFANA_POSTGRES_DATASOURCE_URL
    user: $GRAFANA_POSTGRES_DATASOURCE_USER
    secureJsonData:
      password: $GRAFANA_POSTGRES_DATASOURCE_PWD
    jsonData:
      database: metrics
      maxOpenConns: 100
      maxIdleConns: 100
      maxIdleConnsAuto: true
      connMaxLifetime: 14400
      postgresVersion: 1000
      timescaledb: false
--- a/benchmark/init_db.sql
+++ b/benchmark/init_db.sql
@ -1,33 +0,0 @@
 CREATE TABLE IF NOT EXISTS benchmarks (
  benchmark_id SERIAL PRIMARY KEY,
  branch VARCHAR(255),
  commit_id VARCHAR(72),
  commit_message VARCHAR(70),
  metadata jsonb,
  created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
 );
 CREATE INDEX IF NOT EXISTS benchmarks_benchmark_id_idx ON benchmarks (benchmark_id);
 CREATE INDEX IF NOT EXISTS benchmarks_branch_idx ON benchmarks (branch);
 CREATE TABLE IF NOT EXISTS device_measurements (
  measurement_id SERIAL PRIMARY KEY,
  benchmark_id int REFERENCES benchmarks (benchmark_id),
  cpu_util double precision,
  mem_megabytes double precision,
  gpu_util double precision,
  gpu_mem_megabytes double precision,
  time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
 );
 CREATE INDEX IF NOT EXISTS device_measurements_branch_idx ON device_measurements (benchmark_id);
 CREATE TABLE IF NOT EXISTS model_measurements (
  measurement_id SERIAL PRIMARY KEY,
  benchmark_id int REFERENCES benchmarks (benchmark_id),
  measurements jsonb,
  time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
 );
 CREATE INDEX IF NOT EXISTS model_measurements_branch_idx ON model_measurements (benchmark_id);
--- a/benchmark/llama.py
+++ b/benchmark/llama.py
@ -1,342 +0,0 @@
 from logging import Logger
 import os
 from threading import Event, Thread
 from time import perf_counter, sleep
 from typing import Optional
 from benchmarks_entrypoint import MetricsRecorder
 import gpustat
 import psutil
 import psycopg2
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "1"
 torch.set_float32_matmul_precision("high")
 def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
    p = psutil.Process(os.getpid())
    while not continue_metric_collection.is_set():
        with p.oneshot():
            cpu_util = p.cpu_percent()
            mem_megabytes = p.memory_info().rss / (1024 * 1024)
        gpu_stats = gpustat.GPUStatCollection.new_query()
        gpu_util = gpu_stats[0]["utilization.gpu"]
        gpu_mem_megabytes = gpu_stats[0]["memory.used"]
        metrics_recorder.collect_device_measurements(
            benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
        )
        sleep(0.01)
 def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
    continue_metric_collection = Event()
    metrics_thread = None
    model_id = "meta-llama/Llama-2-7b-hf"
    metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
    try:
        gpu_stats = gpustat.GPUStatCollection.new_query()
        gpu_name = gpu_stats[0]["name"]
        benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
        logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
        metrics_thread = Thread(
            target=collect_metrics,
            args=[benchmark_id, continue_metric_collection, metrics_recorder],
        )
        metrics_thread.start()
        logger.info("started background thread to fetch device metrics")
        os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling
        device = "cuda"
        logger.info("downloading weights")
        # This is to avoid counting download in model load time measurement
        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
        gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
        logger.info("loading model")
        start = perf_counter()
        model = AutoModelForCausalLM.from_pretrained(
            model_id, torch_dtype=torch.float16, generation_config=gen_config
        ).eval()
        model.to(device)
        torch.cuda.synchronize()
        end = perf_counter()
        model_load_time = end - start
        logger.info(f"loaded model in: {model_load_time}s")
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        prompt = "Why dogs are so cute?"
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        # Specify the max length (including both the prompt and the response)
        # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
        # with sequence length = `max_length`. The longer the more you will re-use it
        seq_length = inputs["input_ids"].shape[1]
        model.generation_config.max_length = seq_length + num_tokens_to_generate
        batch_size = inputs["input_ids"].shape[0]
        # Copied from the gpt-fast repo
        def multinomial_sample_one_no_sync(probs_sort):  # Does multinomial sampling without a cuda synchronization
            q = torch.empty_like(probs_sort).exponential_(1)
            return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
        def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
            logits = logits / max(temperature, 1e-5)
            if top_k is not None:
                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
                pivot = v.select(-1, -1).unsqueeze(-1)
                logits = torch.where(logits < pivot, -float("Inf"), logits)
            probs = torch.nn.functional.softmax(logits, dim=-1)
            return probs
        def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
            probs = logits_to_probs(logits[:, -1], temperature, top_k)
            idx_next = multinomial_sample_one_no_sync(probs)
            return idx_next, probs
        def decode_one_token(model, cur_token, cache_position, past_key_values):
            logits = model(
                cur_token,
                cache_position=cache_position,
                past_key_values=past_key_values,
                return_dict=False,
                use_cache=True,
            )[0]
            new_token = sample(logits, temperature=0.6, top_k=5)[0]
            return new_token
        #########
        # Eager #
        #########
        with torch.no_grad():
            past_key_values = StaticCache(
                model.config,
                batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + num_tokens_to_generate,
            )
            cache_position = torch.arange(seq_length, device=device)
            start = perf_counter()
            model(
                **inputs,
                cache_position=cache_position,
                past_key_values=past_key_values,
                return_dict=False,
                use_cache=True,
            )
            end = perf_counter()
            first_eager_fwd_pass_time = end - start
            logger.info(f"completed first eager fwd pass in: {first_eager_fwd_pass_time}s")
            start = perf_counter()
            output = model.generate(**inputs, do_sample=False)
            end = perf_counter()
            first_eager_generate_time = end - start
            logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
            past_key_values = StaticCache(
                model.config,
                batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + num_tokens_to_generate,
            )
            cache_position = torch.arange(seq_length, device=device)
            start = perf_counter()
            model(
                **inputs,
                cache_position=cache_position,
                past_key_values=past_key_values,
                return_dict=False,
                use_cache=True,
            )
            end = perf_counter()
            second_eager_fwd_pass_time = end - start
            logger.info(f"completed second eager fwd pass in: {second_eager_fwd_pass_time}s")
            start = perf_counter()
            model.generate(**inputs, do_sample=False)
            end = perf_counter()
            second_eager_generate_time = end - start
            logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
            torch.compiler.reset()
            ################
            # Forward pass #
            ################
            # `torch.compile(model, ...)` is not recommended as you compile callbacks
            # and full generate. We recommend compiling only the forward for now.
            # "reduce-overhead" will use cudagraphs.
            generated_ids = torch.zeros(
                (batch_size, num_tokens_to_generate + seq_length), dtype=torch.int, device=device
            )
            generated_ids[:, :seq_length] = inputs["input_ids"]
            decode_one_token = torch.compile(decode_one_token, mode="reduce-overhead", fullgraph=True)
            # model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
            # TODO use  decode_one_token(model, input_id.clone(), cache_position) for verification
            past_key_values = StaticCache(
                model.config,
                batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + num_tokens_to_generate + 10,
            )
            cache_position = torch.arange(seq_length, device=device)
            all_generated_tokens = []
            ### First compile, prefill
            start = perf_counter()
            next_token = decode_one_token(
                model, inputs["input_ids"], cache_position=cache_position, past_key_values=past_key_values
            )
            torch.cuda.synchronize()
            end = perf_counter()
            time_to_first_token = end - start
            logger.info(f"completed first compile generation in: {time_to_first_token}s")
            cache_position += 1
            all_generated_tokens += next_token.clone().detach().cpu().tolist()
            cache_position = torch.tensor([seq_length], device=device)
            ### First compile, decoding
            start = perf_counter()
            next_token = decode_one_token(
                model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
            )
            torch.cuda.synchronize()
            end = perf_counter()
            time_to_second_token = end - start
            logger.info(f"completed second compile generation in: {time_to_first_token}s")
            cache_position += 1
            all_generated_tokens += next_token.clone().detach().cpu().tolist()
            ### Second compile, decoding
            start = perf_counter()
            next_token = decode_one_token(
                model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
            )
            torch.cuda.synchronize()
            end = perf_counter()
            time_to_third_token = end - start
            logger.info(f"completed third compile forward in: {time_to_first_token}s")
            cache_position += 1
            all_generated_tokens += next_token.clone().detach().cpu().tolist()
            ### Using cuda graphs decoding
            start = perf_counter()
            for _ in range(1, num_tokens_to_generate):
                all_generated_tokens += next_token.clone().detach().cpu().tolist()
                next_token = decode_one_token(
                    model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
                )
                cache_position += 1
            torch.cuda.synchronize()
            end = perf_counter()
            mean_time_to_next_token = (end - start) / num_tokens_to_generate
            logger.info(f"completed next compile generation in: {mean_time_to_next_token}s")
            logger.info(f"generated: {tokenizer.batch_decode(all_generated_tokens)}")
            ####################
            # Generate compile #
            ####################
            torch.compiler.reset()
            # we will not compile full generate as it' s to intensive, tho we measure full forward!
            past_key_values = StaticCache(
                model.config,
                batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + 128,
            )
            # 1st call
            start = perf_counter()
            output = model.generate(**inputs, past_key_values=past_key_values)
            torch.cuda.synchronize()
            end = perf_counter()
            first_compile_generate_time = end - start
            logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
            past_key_values = StaticCache(
                model.config,
                batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + 128,
            )
            # 2nd call
            start = perf_counter()
            output = model.generate(**inputs, past_key_values=past_key_values)
            torch.cuda.synchronize()
            end = perf_counter()
            second_compile_generate_time = end - start
            logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
            past_key_values = StaticCache(
                model.config,
                batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + 128,
            )
            # 3nd call
            start = perf_counter()
            output = model.generate(**inputs, past_key_values=past_key_values)
            end = perf_counter()
            third_compile_generate_time = end - start
            logger.info(f"completed second compile generation in: {third_compile_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
            past_key_values = StaticCache(
                model.config,
                batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + 128,
            )
            # 4th call
            start = perf_counter()
            output = model.generate(**inputs, past_key_values=past_key_values)
            end = perf_counter()
            fourth_compile_generate_time = end - start
            logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
        metrics_recorder.collect_model_measurements(
            benchmark_id,
            {
                "model_load_time": model_load_time,
                "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
                "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
                "first_eager_generate_time_secs": first_eager_generate_time,
                "second_eager_generate_time_secs": second_eager_generate_time,
                "time_to_first_token_secs": time_to_first_token,
                "time_to_second_token_secs": time_to_second_token,
                "time_to_third_token_secs": time_to_third_token,
                "time_to_next_token_mean_secs": mean_time_to_next_token,
                "first_compile_generate_time_secs": first_compile_generate_time,
                "second_compile_generate_time_secs": second_compile_generate_time,
                "third_compile_generate_time_secs": third_compile_generate_time,
                "fourth_compile_generate_time_secs": fourth_compile_generate_time,
            },
        )
    except Exception as e:
        logger.error(f"Caught exception: {e}")
    continue_metric_collection.set()
    if metrics_thread is not None:
        metrics_thread.join()
    metrics_recorder.close()
--- a/benchmark/requirements.txt
+++ b/benchmark/requirements.txt
@ -1,5 +0,0 @@
 gpustat==1.1.1
 psutil==6.0.0
 psycopg2==2.9.9
 torch>=2.4.0
 hf_transfer
--- a/docker/README.md
+++ b/docker/README.md
@ -1,9 +0,0 @@
 # Dockers for `transformers`
 In this folder you will find various docker files, and some subfolders. 
 - dockerfiles (ex: `consistency.dockerfile`) present under `~/docker` are used for our "fast" CIs. You should be able to use them for tasks that only need CPU. For example `torch-light` is a very light weights container (703MiB). 
 - subfloder contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs)
 Note that in both case, you need to run `uv pip install -e .`, which should take around 5 seconds. We do it outside the dockerfile for the need of our CI: we checkout a new branch each time, and the `transformers` code is thus updated. 
 We are open to contribution, and invite the community to create dockerfiles with potential arguments that properly choose extras depending on the model's dependencies! :hugs: 
--- a/docker/consistency.dockerfile
+++ b/docker/consistency.dockerfile
@ -1,16 +1,15 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 ARG REF=main
-RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
+RUN apt-get update && apt-get install -y time git pkg-config make git-lfs
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
-RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-cache-dir --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
-# tensorflow pin matching setup.py
+RUN uv pip install --no-cache-dir tensorflow-cpu tf-keras
-RUN uv pip install --no-cache-dir pypi-kenlm
+RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,vision,testing]" 
 RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
 RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]"
 RUN git lfs install
 RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
+RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/custom-tokenizers.dockerfile
+++ b/docker/custom-tokenizers.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler
--- a/docker/examples-tf.dockerfile
+++ b/docker/examples-tf.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git
--- a/docker/examples-torch.dockerfile
+++ b/docker/examples-torch.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
--- a/docker/exotic-models.dockerfile
+++ b/docker/exotic-models.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/jax-light.dockerfile
+++ b/docker/jax-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/pipeline-tf.dockerfile
+++ b/docker/pipeline-tf.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/pipeline-torch.dockerfile
+++ b/docker/pipeline-torch.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/quality.dockerfile
+++ b/docker/quality.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/tf-light.dockerfile
+++ b/docker/tf-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/torch-jax-light.dockerfile
+++ b/docker/torch-jax-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/torch-light.dockerfile
+++ b/docker/torch-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
@ -6,6 +6,6 @@ RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-de
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
 RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
-RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken]"
+RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
 RUN pip uninstall -y transformers
--- a/docker/torch-tf-light.dockerfile
+++ b/docker/torch-tf-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 RUN echo ${REF}
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
 LABEL maintainer="Hugging Face"
 ARG DEBIAN_FRONTEND=noninteractive
@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).
-ARG PYTORCH='2.6.0'
+ARG PYTORCH='2.3.0'
 # (not always a valid torch version)
 ARG INTEL_TORCH_EXT='2.3.0'
 # Example: `cu102`, `cu113`, etc.
@ -26,7 +26,7 @@ RUN git clone https://github.com/huggingface/transformers && cd transformers &&
 # 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future.
 # 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`.
 #    Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions).
-RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 "tensorflow_text<2.16" "tensorflow_probability<0.22" && python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
+RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 tensorflow_text tensorflow_probability && python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
 RUN python3 -m pip uninstall -y flax jax
@ -43,7 +43,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
 # For video model testing
-RUN python3 -m pip install --no-cache-dir av==9.2.0
+RUN python3 -m pip install --no-cache-dir decord av==9.2.0
 # Some slow tests require bnb
 RUN python3 -m pip install --no-cache-dir bitsandbytes
@ -65,9 +65,6 @@ RUN python3 -m pip install --no-cache-dir python-Levenshtein
 # For `FastSpeech2ConformerTokenizer` tokenizer
 RUN python3 -m pip install --no-cache-dir g2p-en
 # For Some bitsandbytes tests
 RUN python3 -m pip install --no-cache-dir einops
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-past-gpu/Dockerfile
+++ b/docker/transformers-past-gpu/Dockerfile
@ -48,8 +48,8 @@ RUN python3 -m pip uninstall -y torch-tensorrt apex
 # Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
 RUN python3 -m pip uninstall -y deepspeed
 # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
-# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
+# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
-# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
+# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
 #    DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
 RUN python3 -m pip install -U "itsdangerous<2.1.0"
--- a/docker/transformers-pytorch-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-amd-gpu/Dockerfile
@ -1,4 +1,5 @@
-FROM rocm/dev-ubuntu-22.04:6.2.4
+FROM rocm/dev-ubuntu-22.04:6.0.2
 # rocm/pytorch has no version with 2.1.0
 LABEL maintainer="Hugging Face"
 ARG DEBIAN_FRONTEND=noninteractive
@ -10,7 +11,7 @@ RUN apt update && \
 RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
-RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
+RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
 RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
@ -29,5 +30,5 @@ RUN python3 -m pip uninstall -y tensorflow flax
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
-# Remove nvml and nvidia-ml-py as it is not compatible with ROCm. apex is not tested on NVIDIA either.
+# Remove nvml as it is not compatible with ROCm. apex is not tested on NVIDIA either.
-RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
+RUN python3 -m pip uninstall py3nvml pynvml apex -y
--- a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
@ -1,11 +1,11 @@
-FROM rocm/dev-ubuntu-22.04:6.2.4
+FROM rocm/dev-ubuntu-22.04:5.6
 LABEL maintainer="Hugging Face"
 ARG DEBIAN_FRONTEND=noninteractive
-ARG PYTORCH='2.5.1'
+ARG PYTORCH='2.1.1'
-ARG TORCH_VISION='0.20.0'
+ARG TORCH_VISION='0.16.1'
-ARG TORCH_AUDIO='2.5.0'
+ARG TORCH_AUDIO='2.1.1'
-ARG ROCM='6.2'
+ARG ROCM='5.6'
 RUN apt update && \
    apt install -y --no-install-recommends \
@ -22,7 +22,7 @@ RUN apt update && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*
-RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic>=2.0.0"
+RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic<2"
 RUN python3 -m pip uninstall -y apex torch torchvision torchaudio
 RUN python3 -m pip install torch==$PYTORCH torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO --index-url https://download.pytorch.org/whl/rocm$ROCM --no-cache-dir
@ -45,4 +45,4 @@ RUN cd transformers && python3 setup.py develop
 RUN python3 -c "from deepspeed.launcher.runner import main"
 # Remove nvml as it is not compatible with ROCm
-RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
+RUN python3 -m pip uninstall py3nvml pynvml -y
--- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@ -1,5 +1,5 @@
 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
-FROM nvcr.io/nvidia/pytorch:23.11-py3
+FROM nvcr.io/nvidia/pytorch:23.04-py3
 LABEL maintainer="Hugging Face"
 ARG DEBIAN_FRONTEND=noninteractive
@ -42,12 +42,12 @@ RUN python3 -m pip uninstall -y deepspeed
 # This has to be run (again) inside the GPU VMs running the tests.
 # The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests.
 # TODO: Find out why test fail.
-RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
+RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
 # The base image ships with `pydantic==1.8.2` which is not working - i.e. the next command fails
-RUN python3 -m pip install -U --no-cache-dir "pydantic>=2.0.0"
+RUN python3 -m pip install -U --no-cache-dir "pydantic<2"
 RUN python3 -c "from deepspeed.launcher.runner import main"
--- a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
@ -34,8 +34,8 @@ RUN python3 -m pip uninstall -y torch-tensorrt apex
 # Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
 RUN python3 -m pip uninstall -y deepspeed
 # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
-# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
+# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
-# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
+# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
 #    DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
 ## For `torchdynamo` tests
--- a/docker/transformers-pytorch-gpu/Dockerfile
+++ b/docker/transformers-pytorch-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
 LABEL maintainer="Hugging Face"
 ARG DEBIAN_FRONTEND=noninteractive
@ -11,7 +11,7 @@ ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
 # If set to nothing, will install the latest version
-ARG PYTORCH='2.6.0'
+ARG PYTORCH='2.3.0'
 ARG TORCH_VISION=''
 ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
 LABEL maintainer="Hugging Face"
 ARG DEBIAN_FRONTEND=noninteractive
@ -9,12 +9,12 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).
-ARG PYTORCH='2.5.1'
+ARG PYTORCH='2.2.1'
 # Example: `cu102`, `cu113`, etc.
 ARG CUDA='cu118'
 RUN apt update
-RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
+RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python python3-pip ffmpeg
 RUN python3 -m pip install --no-cache-dir --upgrade pip
 ARG REF=main
@ -36,23 +36,15 @@ RUN python3 -m pip install --no-cache-dir einops
 # Add bitsandbytes for mixed int8 testing
 RUN python3 -m pip install --no-cache-dir bitsandbytes
-# Add auto-gptq for gtpq quantization testing, installed from source for pytorch==2.5.1 compatibility
+# Add auto-gptq for gtpq quantization testing
-# TORCH_CUDA_ARCH_LIST="7.5+PTX" is added to make the package compile for Tesla T4 gpus available for the CI.
+RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
 RUN pip install gekko
 RUN git clone https://github.com/PanQiWei/AutoGPTQ.git && cd AutoGPTQ && TORCH_CUDA_ARCH_LIST="7.5+PTX" python3 setup.py install
 # Add optimum for gptq quantization testing
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
 # Add PEFT
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/peft@main#egg=peft
 # Add aqlm for quantization testing
 RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
 # Add vptq for quantization testing
 RUN python3 -m pip install --no-cache-dir vptq
 # Add hqq for quantization testing
 RUN python3 -m pip install --no-cache-dir hqq
@ -60,19 +52,15 @@ RUN python3 -m pip install --no-cache-dir hqq
 RUN python3 -m pip install --no-cache-dir gguf
 # Add autoawq for quantization testing
-# >=v0.2.7 needed for compatibility with transformers > 4.46
+# >=v0.2.3 needed for compatibility with torch 2.2.1
-RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.7.post2/autoawq-0.2.7.post2-py3-none-any.whl
+RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl
 # Add quanto for quantization testing
-RUN python3 -m pip install --no-cache-dir optimum-quanto
+RUN python3 -m pip install --no-cache-dir quanto
 # Add eetq for quantization testing
 RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git
 # Add flute-kernel and fast_hadamard_transform for quantization testing
 RUN python3 -m pip install --no-cache-dir flute-kernel==0.3.0 -i https://flute-ai.github.io/whl/cu118
 RUN python3 -m pip install --no-cache-dir fast_hadamard_transform==1.0.4.post1
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-tensorflow-gpu/Dockerfile
+++ b/docker/transformers-tensorflow-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
 LABEL maintainer="Hugging Face"
 ARG DEBIAN_FRONTEND=noninteractive
@ -18,7 +18,7 @@ RUN [ ${#TENSORFLOW} -gt 0 ] && VERSION='tensorflow=='$TENSORFLOW'.*' ||  VERSIO
 RUN python3 -m pip uninstall -y torch flax
 RUN python3 -m pip install -U "itsdangerous<2.1.0"
-RUN python3 -m pip install --no-cache-dir -U "tensorflow_probability<0.22"
+RUN python3 -m pip install --no-cache-dir -U tensorflow_probability
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
--- a/docs/README.md
+++ b/docs/README.md
@ -276,14 +276,14 @@ building the return.
 Here's an example of a single value return:
-```python
+```
    Returns:
        `List[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token.
 ```
 Here's an example of a tuple return, comprising several objects:
-```python
+```
    Returns:
        `tuple(torch.FloatTensor)` comprising various elements depending on the configuration ([`BertConfig`]) and inputs:
        - ** loss** (*optional*, returned when `masked_lm_labels` is provided) `torch.FloatTensor` of shape `(1,)` --
@ -322,9 +322,10 @@ includes an example of how to transcribe speech to text in the
 The syntax for Example docstrings can look as follows:
-```python
+```
    Example:
    ```python
    >>> from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
    >>> from datasets import load_dataset
    >>> import torch
@ -346,6 +347,7 @@ The syntax for Example docstrings can look as follows:
    >>> transcription = processor.batch_decode(predicted_ids)
    >>> transcription[0]
    'MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL'
    ```
 ```
 The docstring should give a minimal, clear example of how the respective model 
--- a/docs/TRANSLATING.md
+++ b/docs/TRANSLATING.md
@ -1,70 +1,57 @@
-# Translating the Transformers documentation into your language
+### Translating the Transformers documentation into your language
-As part of our mission to democratize machine learning, we aim to make the Transformers library available in many more languages! Follow the steps below to help translate the documentation into your language.
+As part of our mission to democratize machine learning, we'd love to make the Transformers library available in many more languages! Follow the steps below if you want to help translate the documentation into your language 🙏.
-## Open an Issue
+**🗞️ Open an issue**
-1. Navigate to the Issues page of this repository.
+To get started, navigate to the [Issues](https://github.com/huggingface/transformers/issues) page of this repo and check if anyone else has opened an issue for your language. If not, open a new issue by selecting the "Translation template" from the "New issue" button.
 2. Check if anyone has already opened an issue for your language.
 3. If not, create a new issue by selecting the "Translation template" from the "New issue" button.
 4. Post a comment indicating which chapters you’d like to work on, and we’ll add your name to the list.
-## Fork the Repository
+Once an issue exists, post a comment to indicate which chapters you'd like to work on, and we'll add your name to the list.
 1. First, fork the Transformers repo by clicking the Fork button in the top-right corner.
 2. Clone your fork to your local machine for editing with the following command:
-    ```bash
+**🍴 Fork the repository**
    git clone https://github.com/YOUR-USERNAME/transformers.git
    ```
   Replace `YOUR-USERNAME` with your GitHub username.
-## Copy-paste the English version with a new language code
+First, you'll need to [fork the Transformers repo](https://docs.github.com/en/get-started/quickstart/fork-a-repo). You can do this by clicking on the **Fork** button on the top-right corner of this repo's page.
-The documentation files are organized in the following directory:
+Once you've forked the repo, you'll want to get the files on your local machine for editing. You can do that by cloning the fork with Git as follows:
- **docs/source**: This contains all documentation materials organized by language.
+```bash
 git clone https://github.com/YOUR-USERNAME/transformers.git
 ```
-To copy the English version to your new language directory:
+**📋 Copy-paste the English version with a new language code**
-1. Navigate to your fork of the repository:
+The documentation files are in one leading directory:
-    ```bash
+- [`docs/source`](https://github.com/huggingface/transformers/tree/main/docs/source): All the documentation materials are organized here by language.
    cd ~/path/to/transformers/docs
    ```
-   Replace `~/path/to` with your actual path.
+You'll only need to copy the files in the [`docs/source/en`](https://github.com/huggingface/transformers/tree/main/docs/source/en) directory, so first navigate to your fork of the repo and run the following:
-2. Run the following command:
+```bash
 cd ~/path/to/transformers/docs
 cp -r source/en source/LANG-ID
 ```
-    ```bash
+Here, `LANG-ID` should be one of the ISO 639-1 or ISO 639-2 language codes -- see [here](https://www.loc.gov/standards/iso639-2/php/code_list.php) for a handy table.
    cp -r source/en source/LANG-ID
    ```
-   Replace `LANG-ID` with the appropriate ISO 639-1 or ISO 639-2 language code (see [this table](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) for reference).
+**✍️ Start translating**
-## Start translating
+The fun part comes - translating the text!
-Begin translating the text!
+The first thing we recommend is translating the part of the `_toctree.yml` file that corresponds to your doc chapter. This file is used to render the table of contents on the website. 
-1. Start with the `_toctree.yml` file that corresponds to your documentation chapter. This file is essential for rendering the table of contents on the website.
+> 🙋 If the `_toctree.yml` file doesn't yet exist for your language, you can create one by copy-pasting from the English version and deleting the sections unrelated to your chapter. Just make sure it exists in the `docs/source/LANG-ID/` directory!
-    - If the `_toctree.yml` file doesn’t exist for your language, create one by copying the English version and removing unrelated sections.
+The fields you should add are `local` (with the name of the file containing the translation; e.g. `autoclass_tutorial`), and `title` (with the title of the doc in your language; e.g. `Load pretrained instances with an AutoClass`) -- as a reference, here is the `_toctree.yml` for [English](https://github.com/huggingface/transformers/blob/main/docs/source/en/_toctree.yml):
    - Ensure it is placed in the `docs/source/LANG-ID/` directory.
-    Here’s an example structure for the `_toctree.yml` file:
+```yaml
 - sections:
  - local: pipeline_tutorial # Do not change this! Use the same name for your .md file
    title: Pipelines for inference # Translate this!
    ...
  title: Tutorials # Translate this!
 ```
-    ```yaml
+Once you have translated the `_toctree.yml` file, you can start translating the [MDX](https://mdxjs.com/) files associated with your docs chapter.
    - sections:
      - local: pipeline_tutorial # Keep this name for your .md file
        title: Pipelines for Inference # Translate this
        ...
      title: Tutorials # Translate this
    ```
-2. Once you’ve translated the `_toctree.yml`, move on to translating the associated MDX files.
+> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu and @MKhalusova.
 ## Collaborate and share
 If you'd like assistance with your translation, open an issue and tag `@stevhliu`. Feel free to share resources or glossaries to ensure consistent terminology.
--- a/docs/source/ar/_config.py
+++ b/docs/source/ar/_config.py
@ -1,14 +0,0 @@
 # docstyle-ignore
 INSTALL_CONTENT = """
 # Transformers installation
 ! pip install transformers datasets evaluate accelerate
 # To install from source instead of the last release, comment the command above and uncomment the following one.
 # ! pip install git+https://github.com/huggingface/transformers.git
 """
 notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
 black_avoid_patterns = {
    "{processor_class}": "FakeProcessorClass",
    "{model_class}": "FakeModelClass",
    "{object_class}": "FakeObjectClass",
 }
--- a/docs/source/ar/_toctree.yml
+++ b/docs/source/ar/_toctree.yml
@ -1,898 +0,0 @@
 - sections:
  - local: index
    title: 🤗 المحولات
  - local: quicktour
    title: جولة سريعة
  - local: installation
    title: التثبيت
  title: البدء
 - sections:
  - local: pipeline_tutorial
    title: تشغيل الاستنتاج باستخدام خطوط الأنابيب
  - local: autoclass_tutorial
    title: كتابة تعليمات برمجية متكيفه باستخدام AutoClass
  - local: preprocessing
    title: معالجة البيانات مسبقًا
  - local: training
    title: ضبط نموذج مسبق التدريب
  - local: run_scripts
    title: التدريب باستخدام نص برمجي
  - local: accelerate
    title: إعداد تدريب موزع باستخدام 🤗 Accelerate
  - local: peft
    title: تحميل النماذج المخصصة وتدريبها باستخدام 🤗 PEFT
  - local: model_sharing
    title: مشاركة نموذجك
  - local: agents
    title: الوكلاء
  - local: llm_tutorial
    title: التوليد باستخدام LLMs
  - local: conversations
    title: الدردشة مع المحولات
  title: البرامج التعليمية
 - sections:
  - isExpanded: false
    sections:
    - local: tasks/sequence_classification
      title: تصنيف النصوص
    - local: tasks/token_classification
      title: تصنيف الرموز
    - local: tasks/question_answering
      title: الإجابة على الأسئلة
    - local: tasks/language_modeling
      title: نمذجة اللغة السببية
    - local: tasks/masked_language_modeling
      title: نمذجة اللغة المقنعة
    - local: tasks/translation
      title: الترجمة
    - local: tasks/summarization
      title: التلخيص
    - local: tasks/multiple_choice
      title: الاختيار المتعدد
    title: معالجة اللغات الطبيعية
 #   - isExpanded: false
 #     sections:
 #     - local: tasks/audio_classification
 #       title: تصنيف الصوت
 #     - local: tasks/asr
 #       title: التعرف التلقائي على الكلام
 #     title: الصوت
 #   - isExpanded: false
 #     sections:
 #     - local: tasks/image_classification
 #       title: تصنيف الصور
 #     - local: tasks/semantic_segmentation
 #       title: تجزئة الصور
 #     - local: tasks/video_classification
 #       title: تصنيف الفيديو
 #     - local: tasks/object_detection
 #       title: اكتشاف الأشياء
 #     - local: tasks/zero_shot_object_detection
 #       title: اكتشاف الأشياء بدون تدريب
 #     - local: tasks/zero_shot_image_classification
 #       title: تصنيف الصور بدون تدريب
 #     - local: tasks/monocular_depth_estimation
 #       title: تقدير العمق
 #     - local: tasks/image_to_image
 #       title: صورة إلى صورة
 #     - local: tasks/image_feature_extraction
 #       title: استخراج ميزات الصورة
 #     - local: tasks/mask_generation
 #       title: توليد القناع
 #     - local: tasks/knowledge_distillation_for_image_classification
 #       title: التقليل المعرفي للرؤية الحاسوبية
 #     title: الرؤية الحاسوبية
 #   - isExpanded: false
 #     sections:
 #     - local: tasks/image_captioning
 #       title: وصف الصور Image captioning
 #     - local: tasks/document_question_answering
 #       title: الإجابة على أسئلة المستندات
 #     - local: tasks/visual_question_answering
 #       title: الإجابة على الأسئلة المرئية
 #     - local: tasks/text-to-speech
 #       title: تحويل النص إلى كلام
 #     title: المتعددة الوسائط
 #   - isExpanded: false
 #     sections:
 #     - local: generation_strategies
 #       title: تخصيص استراتيجية التوليد
 #     - local: kv_cache
 #       title: أفضل الممارسات للتوليد باستخدام ذاكرة التخزين المؤقت
 #     title: التوليد
 #   - isExpanded: false
 #     sections:
 #     - local: tasks/idefics
 #       title: مهام الصور مع IDEFICS
 #     - local: tasks/prompting
 #       title: دليل إرشادي لمحفزات النماذج اللغوية الكبيرة
 #     title: الإرشاد
  title: أدلة المهام
 - sections:
  - local: fast_tokenizers
    title: استخدم مجزئيات النصوص السريعة من 🤗 Tokenizers
  - local: multilingual
    title: الاستدلال باستخدام نماذج متعددة اللغات
  - local: create_a_model
    title: استخدام واجهات برمجة التطبيقات الخاصة بالنموذج
  - local: custom_models
    title: مشاركة نموذج مخصص
  - local: chat_templating
    title: قوالب لنماذج الدردشة
  - local: trainer
    title: المدرب
  - local: sagemaker
    title: تشغيل التدريب على Amazon SageMaker
  - local: serialization
    title: التصدير إلى ONNX
  - local: tflite
    title: التصدير إلى TFLite
  - local: torchscript
    title: التصدير إلى TorchScript
  - local: notebooks
    title: دفاتر الملاحظات مع الأمثلة
  - local: community
    title: موارد المجتمع
  - local: troubleshooting
    title: استكشاف الأخطاء وإصلاحها
  - local: gguf
    title: التوافق مع ملفات GGUF
  - local: tiktoken
    title: التوافق مع ملفات TikToken
  - local: modular_transformers
    title: الوحدات النمطية في `transformers`
  - local: how_to_hack_models
    title: اختراق النموذج (الكتابة فوق فئة لاستخدامك)
  title: أدلة المطورين
 # - sections:
 #   - local: quantization/overview
 #     title: نظرة عامة
 #   - local: quantization/bitsandbytes
 #     title: bitsandbytes
 #   - local: quantization/gptq
 #     title: GPTQ
 #   - local: quantization/awq
 #     title: AWQ
 #   - local: quantization/aqlm
 #     title: AQLM
 #   - local: quantization/vptq
 #     title: VPTQ
 #   - local: quantization/quanto
 #     title: Quanto
 #   - local: quantization/eetq
 #     title: EETQ
 #   - local: quantization/hqq
 #     title: HQQ
 #   - local: quantization/optimum
 #     title: Optimum
 #   - local: quantization/contribute
 #     title: المساهمة بطريقة جديدة للتكميم
 #   title: أساليب التكميم
 # - sections:
 #   - local: performance
 #     title: الأداء-نظرة عامة
 #   - local: llm_optims
 #     title: تحسين الاستدلال LLM
 #   - sections:
 #     - local: perf_train_gpu_one
 #       title: استخدام عدة وحدات معالجة رسوميات (GPUs) بشكل متوازٍ
 #     - local: perf_train_gpu_many
 #       title: وحدات معالجة الرسومات (GPU) متعددة والتوازي
 #     - local: fsdp
 #       title: Fully Sharded Data Parallel
 #     - local: deepspeed
 #       title: DeepSpeed
 #     - local: perf_train_cpu
 #       title: التدريب الفعال على وحدة المعالجة المركزية (CPU)
 #     - local: perf_train_cpu_many
 #       title: التدريب الموزع لوحدة المعالجة المركزية (CPU)
 #     - local: perf_train_tpu_tf
 #       title: التدريب على (TPU) باستخدام TensorFlow
 #     - local: perf_train_special
 #       title: تدريب PyTorch على Apple silicon
 #     - local: perf_hardware
 #       title: الأجهزة المخصصة للتدريب
 #     - local: hpo_train
 #       title: البحث عن المعاملات المثلى باستخدام واجهة برمجة تطبيقات المدرب
 #     title: تقنيات التدريب الفعال
 #   - sections:
 #     - local: perf_infer_cpu
 #       title: الإستدلال على وحدة المعالجة المركزية (CPU)
 #     - local: perf_infer_gpu_one
 #       title: الإستدلال على وحدة معالجة الرسومات (GPU)
 #     title: تحسين الاستدلال
 #   - local: big_models
 #     title: إنشاء نموذج كبير
 #   - local: debugging
 #     title: تصحيح الأخطاء البرمجية
 #   - local: tf_xla
 #     title: تكامل XLA لنماذج TensorFlow
 #   - local: perf_torch_compile
 #     title: تحسين الاستدلال باستخدام `torch.compile()`
 #   title: الأداء وقابلية التوسع
 # - sections:
 #   - local: contributing
 #     title: كيفية المساهمة في 🤗 المحولات؟
 #   - local: add_new_model
 #     title: كيفية إضافة نموذج إلى 🤗 المحولات؟
 #   - local: add_new_pipeline
 #     title: كيفية إضافة خط أنابيب إلى 🤗 المحولات؟
 #   - local: testing
 #     title: الاختبار
 #   - local: pr_checks
 #     title: التحقق من طلب السحب
 #   title: المساهمة
 - sections:
  - local: philosophy
    title: الفلسفة
  - local: glossary
    title: (قاموس المصطلحات (قائمة الكلمات
  - local: task_summary
    title: ما الذي يمكن أن تفعله 🤗 المحولات
  - local: tasks_explained
    title: كيف تحل المحولات المهام
  - local: model_summary
    title: عائلة نماذج المحول
  - local: tokenizer_summary
    title: ملخص برنامج مقسم النصوص (tokenizers)
  - local: attention
    title: الانتباه Attention
  - local: pad_truncation
    title: الحشو والتقليم
  - local: bertology
    title: BERTology
  - local: perplexity
    title: حيرة النماذج ذات الطول الثابت
  - local: pipeline_webserver
    title: خطوط الأنابيب للاستدلال على خادم الويب
  - local: model_memory_anatomy
    title: تشريح تدريب النموذج
  - local: llm_tutorial_optimization
    title: الاستفادة القصوى من LLMs
  title: أطر مفاهيمية
 # - sections:
 #   - sections:
 #     - local: main_classes/agent
 #       title: الوكلاء والأدوات
 #     - local: model_doc/auto
 #       title: فئات يتم إنشاؤها ديناميكيًا
 #     - local: main_classes/backbones
 #       title: العمود الفقري
 #     - local: main_classes/callback
 #       title: عمليات الاسترجاع
 #     - local: main_classes/configuration
 #       title: التكوين
 #     - local: main_classes/data_collator
 #       title: مجمع البيانات
 #     - local: main_classes/keras_callbacks
 #       title: استدعاءات Keras
 #     - local: main_classes/logging
 #       title: التسجيل
 #     - local: main_classes/model
 #       title: النماذج
 #     - local: main_classes/text_generation
 #       title: توليد النصوص
 #     - local: main_classes/onnx
 #       title: ONNX
 #     - local: main_classes/optimizer_schedules
 #       title: التحسين
 #     - local: main_classes/output
 #       title: مخرجات النموذج
 #     - local: main_classes/pipelines
 #       title: خطوط الأنابيب
 #     - local: main_classes/processors
 #       title: المعالجات
 #     - local: main_classes/quantization
 #       title: التكميم
 #     - local: main_classes/tokenizer
 #       title: برنامج مقسم النصوص
 #     - local: main_classes/trainer
 #       title: المدرب
 #     - local: main_classes/deepspeed
 #       title: DeepSpeed
 #     - local: main_classes/feature_extractor
 #       title: مستخرج الميزات
 #     - local: main_classes/image_processor
 #       title: معالج الصور
 #     title: الفئات الرئيسية
 #   - sections:
 #     - isExpanded: false
 #       sections:
 #       - local: model_doc/albert
 #         title: ALBERT
 #       - local: model_doc/bart
 #         title: BART
 #       - local: model_doc/barthez
 #         title: BARThez
 #       - local: model_doc/bartpho
 #         title: BARTpho
 #       - local: model_doc/bert
 #         title: BERT
 #       - local: model_doc/bert-generation
 #         title: BertGeneration
 #       - local: model_doc/bert-japanese
 #         title: BertJapanese
 #       - local: model_doc/bertweet
 #         title: Bertweet
 #       - local: model_doc/big_bird
 #         title: BigBird
 #       - local: model_doc/bigbird_pegasus
 #         title: BigBirdPegasus
 #       - local: model_doc/biogpt
 #         title: BioGpt
 #       - local: model_doc/blenderbot
 #         title: Blenderbot
 #       - local: model_doc/blenderbot-small
 #         title: Blenderbot Small
 #       - local: model_doc/bloom
 #         title: BLOOM
 #       - local: model_doc/bort
 #         title: BORT
 #       - local: model_doc/byt5
 #         title: ByT5
 #       - local: model_doc/camembert
 #         title: CamemBERT
 #       - local: model_doc/canine
 #         title: CANINE
 #       - local: model_doc/codegen
 #         title: CodeGen
 #       - local: model_doc/code_llama
 #         title: CodeLlama
 #       - local: model_doc/cohere
 #         title: Cohere
 #       - local: model_doc/convbert
 #         title: ConvBERT
 #       - local: model_doc/cpm
 #         title: CPM
 #       - local: model_doc/cpmant
 #         title: CPMANT
 #       - local: model_doc/ctrl
 #         title: CTRL
 #       - local: model_doc/dbrx
 #         title: DBRX
 #       - local: model_doc/deberta
 #         title: DeBERTa
 #       - local: model_doc/deberta-v2
 #         title: DeBERTa-v2
 #       - local: model_doc/dialogpt
 #         title: DialoGPT
 #       - local: model_doc/distilbert
 #         title: DistilBERT
 #       - local: model_doc/dpr
 #         title: DPR
 #       - local: model_doc/electra
 #         title: ELECTRA
 #       - local: model_doc/encoder-decoder
 #         title: Encoder Decoder Models
 #       - local: model_doc/ernie
 #         title: ERNIE
 #       - local: model_doc/ernie_m
 #         title: ErnieM
 #       - local: model_doc/esm
 #         title: ESM
 #       - local: model_doc/falcon
 #         title: Falcon
 #       - local: model_doc/fastspeech2_conformer
 #         title: FastSpeech2Conformer
 #       - local: model_doc/flan-t5
 #         title: FLAN-T5
 #       - local: model_doc/flan-ul2
 #         title: FLAN-UL2
 #       - local: model_doc/flaubert
 #         title: FlauBERT
 #       - local: model_doc/fnet
 #         title: FNet
 #       - local: model_doc/fsmt
 #         title: FSMT
 #       - local: model_doc/funnel
 #         title: Funnel Transformer
 #       - local: model_doc/fuyu
 #         title: Fuyu
 #       - local: model_doc/gemma
 #         title: Gemma
 #       - local: model_doc/openai-gpt
 #         title: GPT
 #       - local: model_doc/gpt_neo
 #         title: GPT Neo
 #       - local: model_doc/gpt_neox
 #         title: GPT NeoX
 #       - local: model_doc/gpt_neox_japanese
 #         title: GPT NeoX Japanese
 #       - local: model_doc/gptj
 #         title: GPT-J
 #       - local: model_doc/gpt2
 #         title: GPT2
 #       - local: model_doc/gpt_bigcode
 #         title: GPTBigCode
 #       - local: model_doc/gptsan-japanese
 #         title: GPTSAN Japanese
 #       - local: model_doc/gpt-sw3
 #         title: GPTSw3
 #       - local: model_doc/herbert
 #         title: HerBERT
 #       - local: model_doc/ibert
 #         title: I-BERT
 #       - local: model_doc/jamba
 #         title: Jamba
 #       - local: model_doc/jetmoe
 #         title: JetMoe
 #       - local: model_doc/jukebox
 #         title: Jukebox
 #       - local: model_doc/led
 #         title: LED
 #       - local: model_doc/llama
 #         title: LLaMA
 #       - local: model_doc/llama2
 #         title: Llama2
 #       - local: model_doc/llama3
 #         title: Llama3
 #       - local: model_doc/longformer
 #         title: Longformer
 #       - local: model_doc/longt5
 #         title: LongT5
 #       - local: model_doc/luke
 #         title: LUKE
 #       - local: model_doc/m2m_100
 #         title: M2M100
 #       - local: model_doc/madlad-400
 #         title: MADLAD-400
 #       - local: model_doc/mamba
 #         title: Mamba
 #       - local: model_doc/marian
 #         title: MarianMT
 #       - local: model_doc/markuplm
 #         title: MarkupLM
 #       - local: model_doc/mbart
 #         title: MBart and MBart-50
 #       - local: model_doc/mega
 #         title: MEGA
 #       - local: model_doc/megatron-bert
 #         title: MegatronBERT
 #       - local: model_doc/megatron_gpt2
 #         title: MegatronGPT2
 #       - local: model_doc/mistral
 #         title: Mistral
 #       - local: model_doc/mixtral
 #         title: Mixtral
 #       - local: model_doc/mluke
 #         title: mLUKE
 #       - local: model_doc/mobilebert
 #         title: MobileBERT
 #       - local: model_doc/mpnet
 #         title: MPNet
 #       - local: model_doc/mpt
 #         title: MPT
 #       - local: model_doc/mra
 #         title: MRA
 #       - local: model_doc/mt5
 #         title: MT5
 #       - local: model_doc/mvp
 #         title: MVP
 #       - local: model_doc/nezha
 #         title: NEZHA
 #       - local: model_doc/nllb
 #         title: NLLB
 #       - local: model_doc/nllb-moe
 #         title: NLLB-MoE
 #       - local: model_doc/nystromformer
 #         title: Nyströmformer
 #       - local: model_doc/olmo
 #         title: OLMo
 #       - local: model_doc/open-llama
 #         title: Open-Llama
 #       - local: model_doc/opt
 #         title: OPT
 #       - local: model_doc/pegasus
 #         title: Pegasus
 #       - local: model_doc/pegasus_x
 #         title: PEGASUS-X
 #       - local: model_doc/persimmon
 #         title: Persimmon
 #       - local: model_doc/phi
 #         title: Phi
 #       - local: model_doc/phi3
 #         title: Phi-3
 #       - local: model_doc/phobert
 #         title: PhoBERT
 #       - local: model_doc/plbart
 #         title: PLBart
 #       - local: model_doc/prophetnet
 #         title: ProphetNet
 #       - local: model_doc/qdqbert
 #         title: QDQBert
 #       - local: model_doc/qwen2
 #         title: Qwen2
 #       - local: model_doc/qwen2_moe
 #         title: Qwen2MoE
 #       - local: model_doc/rag
 #         title: RAG
 #       - local: model_doc/realm
 #         title: REALM
 #       - local: model_doc/recurrent_gemma
 #         title: RecurrentGemma
 #       - local: model_doc/reformer
 #         title: Reformer
 #       - local: model_doc/rembert
 #         title: RemBERT
 #       - local: model_doc/retribert
 #         title: RetriBERT
 #       - local: model_doc/roberta
 #         title: RoBERTa
 #       - local: model_doc/roberta-prelayernorm
 #         title: RoBERTa-PreLayerNorm
 #       - local: model_doc/roc_bert
 #         title: RoCBert
 #       - local: model_doc/roformer
 #         title: RoFormer
 #       - local: model_doc/rwkv
 #         title: RWKV
 #       - local: model_doc/splinter
 #         title: Splinter
 #       - local: model_doc/squeezebert
 #         title: SqueezeBERT
 #       - local: model_doc/stablelm
 #         title: StableLm
 #       - local: model_doc/starcoder2
 #         title: Starcoder2
 #       - local: model_doc/switch_transformers
 #         title: SwitchTransformers
 #       - local: model_doc/t5
 #         title: T5
 #       - local: model_doc/t5v1.1
 #         title: T5v1.1
 #       - local: model_doc/tapex
 #         title: TAPEX
 #       - local: model_doc/transfo-xl
 #         title: Transformer XL
 #       - local: model_doc/ul2
 #         title: UL2
 #       - local: model_doc/umt5
 #         title: UMT5
 #       - local: model_doc/xmod
 #         title: X-MOD
 #       - local: model_doc/xglm
 #         title: XGLM
 #       - local: model_doc/xlm
 #         title: XLM
 #       - local: model_doc/xlm-prophetnet
 #         title: XLM-ProphetNet
 #       - local: model_doc/xlm-roberta
 #         title: XLM-RoBERTa
 #       - local: model_doc/xlm-roberta-xl
 #         title: XLM-RoBERTa-XL
 #       - local: model_doc/xlm-v
 #         title: XLM-V
 #       - local: model_doc/xlnet
 #         title: XLNet
 #       - local: model_doc/yoso
 #         title: YOSO
 #       title: Text models
 #     - isExpanded: false
 #       sections:
 #       - local: model_doc/beit
 #         title: BEiT
 #       - local: model_doc/bit
 #         title: BiT
 #       - local: model_doc/conditional_detr
 #         title: Conditional DETR
 #       - local: model_doc/convnext
 #         title: ConvNeXT
 #       - local: model_doc/convnextv2
 #         title: ConvNeXTV2
 #       - local: model_doc/cvt
 #         title: CVT
 #       - local: model_doc/deformable_detr
 #         title: Deformable DETR
 #       - local: model_doc/deit
 #         title: DeiT
 #       - local: model_doc/depth_anything
 #         title: Depth Anything
 #       - local: model_doc/deta
 #         title: DETA
 #       - local: model_doc/detr
 #         title: DETR
 #       - local: model_doc/dinat
 #         title: DiNAT
 #       - local: model_doc/dinov2
 #         title: DINOV2
 #       - local: model_doc/dit
 #         title: DiT
 #       - local: model_doc/dpt
 #         title: DPT
 #       - local: model_doc/efficientformer
 #         title: EfficientFormer
 #       - local: model_doc/efficientnet
 #         title: EfficientNet
 #       - local: model_doc/focalnet
 #         title: FocalNet
 #       - local: model_doc/glpn
 #         title: GLPN
 #       - local: model_doc/imagegpt
 #         title: ImageGPT
 #       - local: model_doc/levit
 #         title: LeViT
 #       - local: model_doc/mask2former
 #         title: Mask2Former
 #       - local: model_doc/maskformer
 #         title: MaskFormer
 #       - local: model_doc/mobilenet_v1
 #         title: MobileNetV1
 #       - local: model_doc/mobilenet_v2
 #         title: MobileNetV2
 #       - local: model_doc/mobilevit
 #         title: MobileViT
 #       - local: model_doc/mobilevitv2
 #         title: MobileViTV2
 #       - local: model_doc/nat
 #         title: NAT
 #       - local: model_doc/poolformer
 #         title: PoolFormer
 #       - local: model_doc/pvt
 #         title: Pyramid Vision Transformer (PVT)
 #       - local: model_doc/pvt_v2
 #         title: Pyramid Vision Transformer v2 (PVTv2)
 #       - local: model_doc/regnet
 #         title: RegNet
 #       - local: model_doc/resnet
 #         title: ResNet
 #       - local: model_doc/segformer
 #         title: SegFormer
 #       - local: model_doc/seggpt
 #         title: SegGpt
 #       - local: model_doc/superpoint
 #         title: SuperPoint
 #       - local: model_doc/swiftformer
 #         title: SwiftFormer
 #       - local: model_doc/swin
 #         title: Swin Transformer
 #       - local: model_doc/swinv2
 #         title: Swin Transformer V2
 #       - local: model_doc/swin2sr
 #         title: Swin2SR
 #       - local: model_doc/table-transformer
 #         title: Table Transformer
 #       - local: model_doc/upernet
 #         title: UperNet
 #       - local: model_doc/van
 #         title: VAN
 #       - local: model_doc/vit
 #         title: Vision Transformer (ViT)
 #       - local: model_doc/vit_hybrid
 #         title: ViT Hybrid
 #       - local: model_doc/vitdet
 #         title: ViTDet
 #       - local: model_doc/vit_mae
 #         title: ViTMAE
 #       - local: model_doc/vitmatte
 #         title: ViTMatte
 #       - local: model_doc/vit_msn
 #         title: ViTMSN
 #       - local: model_doc/yolos
 #         title: YOLOS
 #       title: Vision models
 #     - isExpanded: false
 #       sections:
 #       - local: model_doc/audio-spectrogram-transformer
 #         title: Audio Spectrogram Transformer
 #       - local: model_doc/bark
 #         title: Bark
 #       - local: model_doc/clap
 #         title: CLAP
 #       - local: model_doc/encodec
 #         title: EnCodec
 #       - local: model_doc/hubert
 #         title: Hubert
 #       - local: model_doc/mctct
 #         title: MCTCT
 #       - local: model_doc/mms
 #         title: MMS
 #       - local: model_doc/musicgen
 #         title: MusicGen
 #       - local: model_doc/musicgen_melody
 #         title: MusicGen Melody
 #       - local: model_doc/pop2piano
 #         title: Pop2Piano
 #       - local: model_doc/seamless_m4t
 #         title: Seamless-M4T
 #       - local: model_doc/seamless_m4t_v2
 #         title: SeamlessM4T-v2
 #       - local: model_doc/sew
 #         title: SEW
 #       - local: model_doc/sew-d
 #         title: SEW-D
 #       - local: model_doc/speech_to_text
 #         title: Speech2Text
 #       - local: model_doc/speech_to_text_2
 #         title: Speech2Text2
 #       - local: model_doc/speecht5
 #         title: SpeechT5
 #       - local: model_doc/unispeech
 #         title: UniSpeech
 #       - local: model_doc/unispeech-sat
 #         title: UniSpeech-SAT
 #       - local: model_doc/univnet
 #         title: UnivNet
 #       - local: model_doc/vits
 #         title: VITS
 #       - local: model_doc/wav2vec2
 #         title: Wav2Vec2
 #       - local: model_doc/wav2vec2-bert
 #         title: Wav2Vec2-BERT
 #       - local: model_doc/wav2vec2-conformer
 #         title: Wav2Vec2-Conformer
 #       - local: model_doc/wav2vec2_phoneme
 #         title: Wav2Vec2Phoneme
 #       - local: model_doc/wavlm
 #         title: WavLM
 #       - local: model_doc/whisper
 #         title: Whisper
 #       - local: model_doc/xls_r
 #         title: XLS-R
 #       - local: model_doc/xlsr_wav2vec2
 #         title: XLSR-Wav2Vec2
 #       title: Audio models
 #     - isExpanded: false
 #       sections:
 #       - local: model_doc/timesformer
 #         title: TimeSformer
 #       - local: model_doc/videomae
 #         title: VideoMAE
 #       - local: model_doc/vivit
 #         title: ViViT
 #       title: Video models
 #     - isExpanded: false
 #       sections:
 #       - local: model_doc/align
 #         title: ALIGN
 #       - local: model_doc/altclip
 #         title: AltCLIP
 #       - local: model_doc/blip
 #         title: BLIP
 #       - local: model_doc/blip-2
 #         title: BLIP-2
 #       - local: model_doc/bridgetower
 #         title: BridgeTower
 #       - local: model_doc/bros
 #         title: BROS
 #       - local: model_doc/chinese_clip
 #         title: Chinese-CLIP
 #       - local: model_doc/clip
 #         title: CLIP
 #       - local: model_doc/clipseg
 #         title: CLIPSeg
 #       - local: model_doc/clvp
 #         title: CLVP
 #       - local: model_doc/data2vec
 #         title: Data2Vec
 #       - local: model_doc/deplot
 #         title: DePlot
 #       - local: model_doc/donut
 #         title: Donut
 #       - local: model_doc/flava
 #         title: FLAVA
 #       - local: model_doc/git
 #         title: GIT
 #       - local: model_doc/grounding-dino
 #         title: Grounding DINO
 #       - local: model_doc/groupvit
 #         title: GroupViT
 #       - local: model_doc/idefics
 #         title: IDEFICS
 #       - local: model_doc/idefics2
 #         title: Idefics2
 #       - local: model_doc/instructblip
 #         title: InstructBLIP
 #       - local: model_doc/kosmos-2
 #         title: KOSMOS-2
 #       - local: model_doc/layoutlm
 #         title: LayoutLM
 #       - local: model_doc/layoutlmv2
 #         title: LayoutLMV2
 #       - local: model_doc/layoutlmv3
 #         title: LayoutLMV3
 #       - local: model_doc/layoutxlm
 #         title: LayoutXLM
 #       - local: model_doc/lilt
 #         title: LiLT
 #       - local: model_doc/llava
 #         title: Llava
 #       - local: model_doc/llava_next
 #         title: LLaVA-NeXT
 #       - local: model_doc/lxmert
 #         title: LXMERT
 #       - local: model_doc/matcha
 #         title: MatCha
 #       - local: model_doc/mgp-str
 #         title: MGP-STR
 #       - local: model_doc/nougat
 #         title: Nougat
 #       - local: model_doc/oneformer
 #         title: OneFormer
 #       - local: model_doc/owlvit
 #         title: OWL-ViT
 #       - local: model_doc/owlv2
 #         title: OWLv2
 #       - local: model_doc/paligemma
 #         title: PaliGemma
 #       - local: model_doc/perceiver
 #         title: Perceiver
 #       - local: model_doc/pix2struct
 #         title: Pix2Struct
 #       - local: model_doc/sam
 #         title: Segment Anything
 #       - local: model_doc/siglip
 #         title: SigLIP
 #       - local: model_doc/speech-encoder-decoder
 #         title: Speech Encoder Decoder Models
 #       - local: model_doc/tapas
 #         title: TAPAS
 #       - local: model_doc/trocr
 #         title: TrOCR
 #       - local: model_doc/tvlt
 #         title: TVLT
 #       - local: model_doc/tvp
 #         title: TVP
 #       - local: model_doc/udop
 #         title: UDOP
 #       - local: model_doc/video_llava
 #         title: VideoLlava
 #       - local: model_doc/vilt
 #         title: ViLT
 #       - local: model_doc/vipllava
 #         title: VipLlava
 #       - local: model_doc/vision-encoder-decoder
 #         title: Vision Encoder Decoder Models
 #       - local: model_doc/vision-text-dual-encoder
 #         title: Vision Text Dual Encoder
 #       - local: model_doc/visual_bert
 #         title: VisualBERT
 #       - local: model_doc/xclip
 #         title: X-CLIP
 #       title: Multimodal models
 #     - isExpanded: false
 #       sections:
 #       - local: model_doc/decision_transformer
 #         title: محول القرار
 #       - local: model_doc/trajectory_transformer
 #         title: محول المسار
 #       title: نماذج التعلم التعزيزية
 #     - isExpanded: false
 #       sections:
 #       - local: model_doc/autoformer
 #         title: Autoformer
 #       - local: model_doc/informer
 #         title: Informer
 #       - local: model_doc/patchtsmixer
 #         title: PatchTSMixer
 #       - local: model_doc/patchtst
 #         title: PatchTST
 #       - local: model_doc/time_series_transformer
 #         title: محول السلاسل الزمنية
 #       title: نماذج السلاسل الزمنية
 #     - isExpanded: false
 #       sections:
 #       - local: model_doc/graphormer
 #         title: Graphormer
 #       title: نماذج الرسم البياني
 #     title: النماذج
 #   - sections:
 #     - local: internal/modeling_utils
 #       title: الطبقات المخصصة والمرافق
 #     - local: internal/pipelines_utils
 #       title: مرافق خطوط الأنابيب
 #     - local: internal/tokenization_utils
 #       title: مرافق مقسم النصوص
 #     - local: internal/trainer_utils
 #       title: مرافق المدرب
 #     - local: internal/generation_utils
 #       title: مرافق التوليد
 #     - local: internal/image_processing_utils
 #       title: مرافق معالجة الصور
 #     - local: internal/audio_utils
 #       title: مرافق معالجة الصوت
 #     - local: internal/file_utils
 #       title: مرافق عامة
 #     - local: internal/time_series_utils
 #       title: مرافق السلاسل الزمنية
 #     title: مساعدون داخليون
 #   title: API
--- a/docs/source/ar/accelerate.md
+++ b/docs/source/ar/accelerate.md
@ -1,120 +0,0 @@
 # التدريب الموزع باستخدام  🤗 Accelerate
 مع تزايد حجم النماذج اللغوية، برز التوازي كأحد الاستراتيجيات لتدريب نماذج أكبر على أجهزة محدودة وتسريع عملية التدريب بمقدار كبير.  أنشأنا في Hugging Face، قمنا بإنشاء مكتبة [ Accelerate](https://huggingface.co/docs/accelerate) لمساعدة المستخدمين على تدريب أي نموذج من  Transformers بسهولة على أي نوع من الإعدادات الموزعة، سواء كان ذلك على عدة وحدات معالجة رسومات (GPUs) على جهاز واحد أو على عدة وحدات معالجة رسومات موزعة على عدة أجهزة. في هذا الدليل، تعلم كيفية تخصيص حلقة تدريب PyTorch الأصلية لتمكين التدريب في بيئة موزعة.
 ## الإعداد
 ابدأ بتثبيت 🤗 Accelerate:
 ```bash
 pip install accelerate
 ```
 ثم قم باستيراد وإنشاء كائن [`~accelerate.Accelerator`]. سيقوم [`~accelerate.Accelerator`] تلقائيًا باكتشاف نوع الإعداد الموزع الخاص بك وتهيئة جميع المكونات اللازمة للتدريب. لن تحتاج إلى وضع نموذجك على جهاز بشكل معين.
 ```py
 >>> from accelerate import Accelerator
 >>> accelerator = Accelerator()
 ```
 ## الاستعداد للتسريع
 الخطوة التالية هي تمرير جميع كائنات التدريب ذات الصلة إلى دالة الإعداد [`~accelerate.Accelerator.prepare`]. ويشمل ذلك DataLoaders للتدريب والتقييم، ونموذجًا ومُحَسِّنً المعاملات (optimizer):
 ```py
 >>> train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
 ...     train_dataloader, eval_dataloader, model, optimizer
 ... )
 ```
 ## الخلفي Backward
 الإضافة الأخيرة هي استبدال الدالة المعتادة `loss.backward()` في حلقة التدريب الخاصة بك بدالة [`~accelerate.Accelerator.backward`] في 🤗 Accelerate:
 ```py
 >>> for epoch in range(num_epochs):
 ...     for batch in train_dataloader:
 ...         outputs = model(**batch)
 ...         loss = outputs.loss
 ...         accelerator.backward(loss)
 ...         optimizer.step()
 ...         lr_scheduler.step()
 ...         optimizer.zero_grad()
 ...         progress_bar.update(1)
 ```
 كما يمكنك أن ترى في الكود التالي، فأنت بحاجة فقط إلى إضافة أربعة أسطر من الكود إلى حلقة التدريب الخاصة بك لتمكين التدريب الموزع!
 ```diff
 + from accelerate import Accelerator
  from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
 + accelerator = Accelerator()
  model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
  optimizer = AdamW(model.parameters(), lr=3e-5)
 - device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 - model.to(device)
 + train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
 +     train_dataloader, eval_dataloader, model, optimizer
 + )
  num_epochs = 3
  num_training_steps = num_epochs * len(train_dataloader)
  lr_scheduler = get_scheduler(
      "linear",
      optimizer=optimizer,
      num_warmup_steps=0,
      num_training_steps=num_training_steps
  )
  progress_bar = tqdm(range(num_training_steps))
  model.train()
  for epoch in range(num_epochs):
      for batch in train_dataloader:
 -         batch = {k: v.to(device) for k, v in batch.items()}
          outputs = model(**batch)
          loss = outputs.loss
 -         loss.backward()
 +         accelerator.backward(loss)
 optimizer.step()
          lr_scheduler.step()
          optimizer.zero_grad()
          progress_bar.update(1)
 ```
 ## تدريب
 بمجرد إضافة أسطر الكود ذات الصلة، قم بتشغيل التدريب الخاص بك في أحد النصوص أو الدفاتر مثل Colaboratory.
 ### التدريب باستخدام نص برمجي
 إذا كنت تشغل التدريب الخاص بك من نص برمجي، فقم بتشغيل الأمر التالي لإنشاء وحفظ ملف تكوين:
 ```bash
 accelerate config
 ```
 ثم قم بتشغيل التدريب الخاص بك باستخدام:
 ```bash
 accelerate launch train.py
 ```
 ### التدريب باستخدام دفتر ملاحظات
 يمكن أيضًا تشغيل 🤗 Accelerate في دفاتر إذا كنت تخطط لاستخدام وحدات معالجة الرسوميات (TPUs) في Colaboratory. قم بتغليف كل الكود المسؤول عن التدريب في دالة، ومررها إلى [`~accelerate.notebook_launcher`]:
 ```py
 >>> from accelerate import notebook_launcher
 >>> notebook_launcher(training_function)
 ```
 للحصول على مزيد من المعلومات حول 🤗 Accelerate وميزاته الغنية، يرجى الرجوع إلى [الوثائق](https://huggingface.co/docs/accelerate).
--- a/docs/source/ar/agents.md
+++ b/docs/source/ar/agents.md
@ -1,539 +0,0 @@
 # الوكلاء والأدوات
 [[open-in-colab]]
 ### ما هو الوكيل؟
 يمكن للنظم اللغوية الكبيرة (LLMs) التي تم تدريبها على أداء [نمذجة اللغة السببية](./tasks/language_modeling.) التعامل مع مجموعة واسعة من المهام، ولكنها غالبًا ما تواجه صعوبات في المهام الأساسية مثل المنطق والحساب والبحث. وعندما يتم استدعاؤها في مجالات لا تؤدي فيها أداءً جيدًا، فإنها غالبًا ما تفشل في توليد الإجابة التي نتوقعها منها.
 يتمثل أحد النهج للتغلب على هذا القصور في إنشاء "وكيل".
 الوكيل هو نظام يستخدم LLM كمحرك له، ولديه حق الوصول إلى وظائف تسمى "أدوات".
 هذه "الأدوات" هي وظائف لأداء مهمة، وتحتوي على جميع الأوصاف اللازمة للوكيل لاستخدامها بشكل صحيح.
 يمكن برمجة الوكيل للقيام بما يلي:
 - وضع سلسلة من الإجراءات/الأدوات وتشغيلها جميعًا في نفس الوقت مثل [`CodeAgent`] على سبيل المثال
 - التخطيط للاجراءات/الأدوات وتنفيذها واحدة تلو الأخرى والانتظار حتى انتهاء كل إجراء قبل إطلاق التالي مثل [`ReactJsonAgent`] على سبيل المثال
 ### أنواع الوكلاء
 #### الوكيل البرمجي (Code agent)
 يتمتع هذا الوكيل يتبع خطوات محددة: أولًا، يخطط لسلسلة من الإجراءات التي يريد تنفيذها، ثم شفرة Python لتنفيذ جميع الإجراءات في نفس الوقت. وهو يتعامل بشكل أصلي مع أنواع مختلفة من المدخلات والمخرجات للأدوات التي يستخدمها، وبالتالي فهو الخيار الموصى به للمهام متعددة الوسائط.
 #### وكلاء التفاعل
 هذا هو الوكيل الذي يتم اللجوء إليه لحل مهام الاستدلال، حيث يجعل إطار ReAct ([Yao et al.، 2022](https://huggingface.co/papers/2210.03629)) من الكفاءة حقًا التفكير على أساس ملاحظاته السابقة.
 نقوم بتنفيذ إصدارين من ReactJsonAgent: 
 - [`ReactJsonAgent`] يقوم بتوليد استدعاءات الأدوات كـ JSON في إخراجها.
 - [`ReactCodeAgent`] هو نوع جديد من ReactJsonAgent يقوم بتوليد استدعاءات أدواته كمقاطع من التعليمات البرمجية، والتي تعمل بشكل جيد حقًا مع LLMs التي تتمتع بأداء  قوي في البرمجة.
 > [!TIP]
 > اقرأ منشور المدونة [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) لمعرفة المزيد عن وكيل ReAct.
 ![إطار عمل وكيل ReAct](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png)
 على سبيل المثال، إليك كيف يعمل وكيل ReAct Code طريقه من خلال السؤال التالي.
 ```py3
 >>> agent.run(
 ...     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
 ... )
 =====New task=====
 How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
 ====Agent is executing the code below:
 bert_blocks = search(query="number of blocks in BERT base encoder")
 print("BERT blocks:", bert_blocks)
 ====
 Print outputs:
 BERT blocks: twelve encoder blocks
 ====Agent is executing the code below:
 attention_layer = search(query="number of layers in Attention is All You Need")
 print("Attention layers:", attention_layer)
 ====
 Print outputs:
 Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
 ====Agent is executing the code below:
 bert_blocks = 12
 attention_layers = 6
 diff = bert_blocks - attention_layers
 print("Difference in blocks:", diff)
 final_answer(diff)
 ====
 Print outputs:
 Difference in blocks: 6
 Final answer: 6
 ```
 ### كيف يمكنني بناء وكيل؟
 لتهيئة وكيل، تحتاج إلى هذه الوسائط:
 - نموذج لغوي كبير (LLM) يشكل المحرك الأساسي للوكيل. الوكيل نفسه ليس النموذج اللغوي، بل هو برنامج يستخدم النموذج اللغوي كمحرك له.
 - موجه النظام (system prompt): هذه هي التعليمات التي يتم إعطاؤها للنموذج اللغوي لإنشاء مخرجاته.
 - صندوق أدوات (toolbox) يختار الوكيل منه الأدوات لتنفيذها
 - محلل (parser) لاستخراج الأدوات التي يجب استدعاؤها من مخرجات النموذج اللغوي LLM والأدوات التي يجب استخدامها
 عند تهيئة نظام الوكيل، يتم استخدام سمات الأداة لإنشاء وصف للأداة، ثم يتم دمجها في موجه النظام الخاص `system_prompt` للوكيل لإعلامه بالأدوات التي يمكنه استخدامها ولماذا.
 للبدء، يرجى تثبيت `agents` الإضافية لتثبيت جميع التبعيات الافتراضية.
 ```bash
 pip install transformers[agents]
 ```
 قم ببناء محرك LLM الخاص بك من خلال تعريف طريقة `llm_engine` التي تقبل قائمة من [الرسائل](./chat_templating.) وتعيد النص. يجب أن تقبل هذه الدالة القابلة للاستدعاء أيضًا معامل `stop` يشير إلى متى يجب التوقف عن التوليد.
 ```python
 from huggingface_hub import login, InferenceClient
 login("<YOUR_HUGGINGFACEHUB_API_TOKEN>")
 client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
 def llm_engine(messages, stop_sequences=["Task"]) -> str:
    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
    answer = response.choices[0].message.content
    return answer
 ```
 يمكنك استخدام أي طريقة `llm_engine` طالما أنها:
 1. يتبع تنسيق [رسائل](./chat_templating.md) لإدخاله (`List [Dict [str، str]]`) ويعيد `str`
 2. يتوقف عن توليد المخراجات من التسلسلات التي تم تمريرها في معامل `stop`
 أنت بحاجة أيضًا إلى معامل "الأدوات" الذي يقبل قائمة من "الأدوات". يمكنك توفير قائمة فارغة لـ "الأدوات"، ولكن استخدم صندوق الأدوات الافتراضي مع معامل اختياري `add_base_tools=True`.
 الآن يمكنك إنشاء وكيل، مثل [`CodeAgent`], وتشغيله. ولتسهيل الأمر، نقدم أيضًا فئة [`HfEngine`] التي تستخدم `huggingface_hub.InferenceClient` بشكل مخفى.
 ```python
 from transformers import CodeAgent, HfEngine
 llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
 agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
 agent.run(
    "Could you translate this sentence from French, say it out loud and return the audio.",
    sentence="Où est la boulangerie la plus proche?",
 )
 ```
 هذه الميزة ستكون مفيدة في حالة الحاجة الملحة! يمكنك حتى ترك معامل `llm_engine` غير محدد، وسيتم إنشاء [`HfEngine`] بشكل تلقائي.
 ```python
 from transformers import CodeAgent
 agent = CodeAgent(tools=[], add_base_tools=True)
 agent.run(
    "Could you translate this sentence from French, say it out loud and give me the audio.",
    sentence="Où est la boulangerie la plus proche?",
 )
 ```
 لاحظ أننا استخدمنا معامل "sentence" إضافي: يمكنك تمرير النص كمعامل إضافي إلى النموذج.
 يمكنك أيضًا استخدام هذا للإشارة إلى مسار الملفات المحلية أو البعيدة للنموذج لاستخدامها:
 ```py
 from transformers import ReactCodeAgent
 agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
 agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
 ```
 تم تحديد موجه النظام ومحلل المخرجات تلقائيًا، ولكن يمكنك فحصهما بسهولة عن طريق استدعاء `system_prompt_template` على وكيلك.
 ```python
 print(agent.system_prompt_template)
 ```
 من المهم أن تشرح بأكبر قدر ممكن من الوضوح المهمة التي تريد تنفيذها.
 كل عملية [`~Agent.run`] مستقلة، وبما أن الوكيل مدعوم من LLM، فقد تؤدي الاختلافات الطفيفة في موجهك إلى نتائج مختلفة تمامًا.
 يمكنك أيضًا تشغيل وكيل بشكل متتالي لمهام مختلفة: في كل مرة يتم فيها إعادة تهيئة سمتي `agent.task` و`agent.logs`.
 #### تنفيذ التعليمات البرمجية
 يقوم مفسر Python بتنفيذ التعليمات البرمجية على مجموعة من المدخلات التي يتم تمريرها جنبًا إلى جنب مع أدواتك.
 يجب أن يكون هذا الأمر آمنًا لأن الوظائف الوحيدة التي يمكن استدعاؤها هي الأدوات التي قدمتها (خاصة إذا كانت أدوات من Hugging Face فقط) ووظيفة الطباعة، لذا فأنت مقيد بالفعل بما يمكن تنفيذه.
 مفسر Python لا يسمح أيضًا باستدعاء دوال بشكل افتراضي خارج قائمة آمنة، لذا فإن جميع الهجمات الأكثر وضوحًا لا ينبغي أن تكون مشكلة.
 يمكنك أيضًا الإذن باستيرادات إضافية عن طريق تمرير الوحدات النمطية المصرح بها كقائمة من السلاسل في معامل  `additional_authorized_imports` عند تهيئة [`ReactCodeAgent`] أو [`CodeAgent`]:
 ```py
 >>> from transformers import ReactCodeAgent
 >>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
 >>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
 (...)
 'Hugging Face – Blog'
 ```
 سيتم إيقاف التنفيذ عند أي رمز يحاول تنفيذ عملية غير قانونية أو إذا كان هناك خطأ Python عادي في التعليمات البرمجية التي تم إنشاؤها بواسطة الوكيل.
 > [!WARNING]
 > يمكن لـ LLM توليد شفرة برمجية عشوائية سيتم تنفيذها بعد ذلك: لا تقمب استدعاء أى دوال غير آمنة!
 ### موجه النظام
 ينشئ الوكيل، أو بالأحرى LLM الذي يقود الوكيل، يولد مخرجات بناءً على موجه النظام. يمكن تخصيص موجه النظام وتصميمه للمهام المقصودة. على سبيل المثال، تحقق من موجه النظام لـ [`ReactCodeAgent`] (الإصدار أدناه مبسط قليلاً).
 ```text
 You will be given a task to solve as best you can.
 You have access to the following tools:
 <<tool_descriptions>>
 To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
 At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
 Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
 During each intermediate step, you can use 'print()' to save whatever important information you will then need.
 These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
 In the end you have to return a final answer using the `final_answer` tool.
 Here are a few examples using notional tools:
 ---
 {examples}
 Above example were using notional tools that might not exist for you. You only have acces to those tools:
 <<tool_names>>
 You also can perform computations in the python code you generate.
 Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```<end_code>' sequence. You MUST provide at least the 'Code:' sequence to move forward.
 Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
 Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
 Remember to make sure that variables you use are all defined.
 Now Begin!
 ```
 يتضمن موجه النظام:
 - *مقدمة* تشرح كيف يجب أن يتصرف الوكيل والأدوات التي يجب عليه استخدامها.
 - وصف لجميع الأدوات التي يتم تحديدها بواسطة رمز `<<tool_descriptions>>` الذي يتم استبداله ديناميكيًا في وقت التشغيل بالأدوات التي يحددها المستخدم أو يختارها.
    - يأتي وصف الأداة من سمات الأداة، `name`، و`description`، و`inputs` و`output_type`، وقالب `jinja2` بسيط يمكنك تحسينه.
 - شكل المخرج المتوقع.
 يمكنك تحسين موجه النظام، على سبيل المثال، عن طريق إضافة شرح لتنسيق المخرجات.
 للحصول على أقصى قدر من المرونة، يمكنك الكتابة فوق قالب موجه النظام بالكامل عن طريق تمرير موجه مخصص كمعامل إلى معلمة `system_prompt`.
 ```python
 from transformers import ReactJsonAgent
 from transformers.agents import PythonInterpreterTool
 agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
 ```
 > [!WARNING]
 > يرجى التأكد من تحديد سلسلة `<<tool_descriptions>>` في مكان ما في `template` حتى يكون الوكيل على علم 
 بالأدوات المتاحة.
 ### فحص تشغيل الوكيل
 فيما يلي بعض السمات المفيدة لفحص ما حدث بعد التشغيل:
 - تخزن  `agent.logs` سجلات مفصلة للوكيل. في كل خطوة من تشغيل الوكيل، يتم تخزين كل شيء في قاموس إلحاقه بـ `agent.logs`.
 - تشغيل `agent.write_inner_memory_from_logs()` يخلق ذاكرة داخلية لسجلات الوكيل للنظام LLM لعرضها، كقائمة من رسائل الدردشة. تنتقل هذه الطريقة عبر كل خطوة من سجل الوكيل ولا تخزن سوى ما يهمها كرسالة: على سبيل المثال، سيحفظ موجه النظام والمهمة في رسائل منفصلة، ثم لكل خطوة سيخزن مخرج LLM كرسالة، ومخرج استدعاء الأداة كرسالة أخرى. استخدم هذا إذا كنت تريد عرضًا عامًا لما حدث - ولكن لن يتم نسخ كل سجل بواسطة هذه الطريقة.
 ## الأدوات
 الأداة هي عبارة عن وظيفة أساسية يستخدمها الوكيل لتنفيذ مهمة محددة.
 يمكنك على سبيل المثال التحقق من [`PythonInterpreterTool`]: لديه اسم ووصف ووصف للمدخلات ونوع للمخرج، وطريقة `__call__` التي تقوم بتنفيذ المهمة المطلوبة.
 عند تهيئة الوكيل، يتم استخدام سمات الأداة لتوليد وصف للأداة يتم تضمينه في موجه النظام الخاص بالوكيل. يتيح هذا للوكيل معرفة الأدوات التي يمكنه استخدامها ولماذا.
 ### صندوق الأدوات الافتراضي
 يأتي Transformers مع صندوق أدوات افتراضي لتمكين الوكلاء، والذي يمكنك إضافته إلى وكيلك عند التهيئة باستخدام معامل `add_base_tools = True`:
 - **الإجابة على أسئلة المستند**: الإجابة على سؤال حول المستند (مثل ملف PDF) بتنسيق صورة ([Donut](./model_doc/donut))
 - **الإجابة على أسئلة الصور**: الإجابة على سؤال حول صورة ([VILT](./model_doc/vilt))
 - **التحدث إلى النص**: قم بتفريغ الكلام إلى نص ([Whisper](./model_doc/whisper))
 - **النص إلى كلام**: تحويل النص إلى كلام ([SpeechT5](./model_doc/speecht5))
 - **الترجمة**: ترجمة جملة معينة من لغة المصدر إلى لغة الهدف.
 - **مفسر كود Python**: تشغيل كود Python الذي تم إنشاؤه بواسطة LLM في بيئة آمنة. لن يتم إضافة هذه الأداة إلى [`ReactJsonAgent`] إلا إذا استخدمت `add_base_tools=True`، نظرًا لأن الأدوات المستندة إلى التعليمات البرمجية يمكنها بالفعل تنفيذ كود Python
 لا تترجم النصوص الخاصة ولا الأكواد البرمجية ولا الروابط ولا رموز HTML وCSS:
 يمكنك استخدام أداة يدويًا عن طريق استدعاء دالة [`load_tool`] وتحديد مهمة لتنفيذها.
 ```python
 from transformers import load_tool
 tool = load_tool("text-to-speech")
 audio = tool("This is a text to speech tool")
 ```
 ### إنشاء أداة جديدة
 يمكنك إنشاء أداتك الخاصة لتغطية حالات الاستخدام التي لا تغطيها الأدوات الافتراضية من Hugging Face.
 على سبيل المثال، دعنا نقوم بإنشاء أداة تعرض النموذج الأكثر تنزيلًا لمهمة معينة من Hub.
 سوف نبدأ بالكود التالي.
 ```python
 from huggingface_hub import list_models
 task = "text-classification"
 model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
 print(model.id)
 ```
 يمكن تحويل هذه الشيفرة إلى فئة ترث من الفئة العليا [`Tool`].
 تحتاج الأداة المخصصة إلى:
 - اسم `name`، والتي تمثل اسم الأداة نفسها. عادةً ما يصف الاسم وظيفتها. بما أن الكود يعيد النموذج الأكثر تنزيلًا لمهمة ما، فلنسمها `model_download_counter`.
 - تستخدم خاصية `description` لملء موجه نظام الوكيل.
 - خاصية `inputs`، والتي هي عبارة عن قاموس بمفاتيح "type" و"description". يحتوي على معلومات تساعد المفسر Python على اتخاذ خيارات مستنيرة بشأن المدخلات.
 - خاصية `output_type`، والتي تحدد نوع المخرج.
 - طريقة `forward` والتي تحتوي على الكود الذي سيتم تنفيذه للحصول على النتيجة النهائية.
 ```python
 from transformers import Tool
 from huggingface_hub import list_models
 class HFModelDownloadsTool(Tool):
    name = "model_download_counter"
    description = (
        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
        "It returns the name of the checkpoint."
    )
    inputs = {
        "task": {
            "type": "text",
            "description": "the task category (such as text-classification, depth-estimation, etc)",
        }
    }
    output_type = "text"
    def forward(self, task: str):
        model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
        return model.id
 ```
 الآن بعد أن أصبحت فئة `HfModelDownloadsTool` المخصصة جاهزة، يمكنك حفظها في ملف باسم `model_downloads.py` واستيرادها للاستخدام.
 ```python
 from model_downloads import HFModelDownloadsTool
 tool = HFModelDownloadsTool()
 ```
 يمكنك أيضًا مشاركة أداتك المخصصة في Hub عن طريق استدعاء [`~Tool.push_to_hub`] على الأداة. تأكد من أنك قمت بإنشاء مستودع لها على Hub وأنك تستخدم رمز وصول للقراءة.
 ```python
 tool.push_to_hub("{your_username}/hf-model-downloads")
 ```
 قم بتحميل الأداة باستخدام دالة [`~Tool.load_tool`] ومررها إلى معلمة `tools` في الوكيل الخاص بك.
 ```python
 from transformers import load_tool, CodeAgent
 model_download_tool = load_tool("m-ric/hf-model-downloads")
 agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
 agent.run(
    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
 )
 ```
 ستحصل على ما يلي:
 ```text
 ======== New task ========
 Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
 ==== Agent is executing the code below:
 most_downloaded_model = model_download_counter(task="text-to-video")
 print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
 ====
 ```
 والناتج:
 `"النموذج الأكثر تنزيلًا لمهمة `text-to-video` هو ByteDance/AnimateDiff-Lightning."`
 ### إدارة صندوق أدوات الوكيل الخاص بك
 إذا كنت قد قمت بتهيئة وكيل، فمن غير الملائم إعادة تهيئته من البداية لإضافة أداة جديدة ترغب في استخدامها. باستخدام مكتبة Transformers، يمكنك إدارة صندوق أدوات الوكيل بإضافة أو استبدال أداة موجودة.
 دعنا نضيف الأداة `model_download_tool` إلى وكيل تم تهيئته مسبقًا باستخدام صندوق الأدوات الافتراضي.
 ```python
 from transformers import CodeAgent
 agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
 agent.toolbox.add_tool(model_download_tool)
 ```
 الآن يمكننا الاستفادة من الأداة الجديدة وأداة تحويل النص إلى كلام السابقة:
 ```python
    agent.run(
        "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
    )
 ```
 | **Audio**                                                                                                                                            |
 |------------------------------------------------------------------------------------------------------------------------------------------------------|
 | <audio controls><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/damo.wav" type="audio/wav"/> |
 > [!WARNING]
 > احترس عند إضافة أدوات إلى وكيل يعمل بالفعل لأنه يمكن أن يؤثر على اختيار الأداة لصالح أداتك أو اختيار أداة أخرى غير المحددة بالفعل.
 استخدم طريقة `agent.toolbox.update_tool()` لاستبدال أداة موجودة في صندوق أدوات الوكيل.
 هذا مفيد إذا كانت أداتك الجديدة بديلاً مباشرًا للأداة الموجودة لأن الوكيل يعرف بالفعل كيفية تنفيذ تلك المهمة المحددة.
 تأكد فقط من اتباع الأداة الجديدة لنفس واجهة برمجة التطبيقات (API) للأداة المستبدلة أو قم بتكييف قالب موجه النظام لضمان تحديث جميع الأمثلة التي تستخدم الأداة المستبدلة.
 ### استخدام مجموعة من الأدوات
 يمكنك الاستفادة من مجموعات الأدوات باستخدام كائن ToolCollection، مع تحديد مجموعة الأدوات التي تريد استخدامها.
 ثم قم بتمريرها كقائمة لتهيئة الوكيل الخاص بك، وبدء استخدامها!
 ```py
 from transformers import ToolCollection, ReactCodeAgent
 image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
 agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
 agent.run("Please draw me a picture of rivers and lakes.")
 ```
 لتسريع البداية، يتم تحميل الأدوات فقط إذا استدعاها الوكيل.
 ستحصل على هذه الصورة:
 <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png" />
 ### استخدام gradio-tools
 [gradio-tools](https://github.com/freddyaboulton/gradio-tools) هي مكتبة قوية تتيح استخدام Hugging
 Face Spaces كأدوات. تدعم العديد من المساحات الموجودة بالإضافة إلى مساحات مخصصة.
 تدعم مكتبة Transformers `gradio_tools` باستخدام طريقة [`Tool.from_gradio`] في الفئة. على سبيل المثال، دعنا نستخدم [`StableDiffusionPromptGeneratorTool`](https://github.com/freddyaboulton/gradio-tools/blob/main/gradio_tools/tools/prompt_generator.py) من مجموعة أدوات `gradio-tools` لتحسين المطالبات لإنشاء صور أفضل.
 استورد وقم بتهيئة الأداة، ثم مررها إلى طريقة `Tool.from_gradio`:
 ```python
 from gradio_tools import StableDiffusionPromptGeneratorTool
 from transformers import Tool, load_tool, CodeAgent
 gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
 prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
 ```
 الآن يمكنك استخدامه مثل أي أداة أخرى. على سبيل المثال، دعنا نحسن الموجه `a rabbit wearing a space suit`.
 ```python
 image_generation_tool = load_tool('huggingface-tools/text-to-image')
 agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
 agent.run(
    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
 )
 ```
 يستفيد النموذج بشكل كافٍ من الأداة:
 ```text
 ======== New task ========
 Improve this prompt, then generate an image of it.
 You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
 ==== Agent is executing the code below:
 improved_prompt = StableDiffusionPromptGenerator(query=prompt)
 while improved_prompt == "QUEUE_FULL":
    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
 print(f"The improved prompt is {improved_prompt}.")
 image = image_generator(prompt=improved_prompt)
 ====
 ```
 قبل إنشاء الصورة أخيرًا:
 <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit_spacesuit_flux.webp" />
 > [!WARNING]
 > تتطلب gradio-tools إدخالات وإخراجات *نصية* حتى عند العمل مع طرائق مختلفة مثل كائنات الصور والصوت. الإدخالات والإخراجات الصورية والصوتية غير متوافقة حاليًا.
 ### استخدام أدوات LangChain
 نحن نحب Langchain ونعتقد أنها تحتوي على مجموعة أدوات قوية للغاية.
 لاستيراد أداة من LangChain، استخدم الطريقة `from_langchain()`.
 فيما يلي كيفية استخدامها لإعادة إنشاء نتيجة البحث في المقدمة باستخدام أداة بحث الويب LangChain.
 ```python
 from langchain.agents import load_tools
 from transformers import Tool, ReactCodeAgent
 search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
 agent = ReactCodeAgent(tools=[search_tool])
 agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
 ```
 ## واجهة Gradio
 يمكنك الاستفادة من `gradio.Chatbot` لعرض أفكار الوكيل الخاص بك باستخدام `stream_to_gradio`، إليك مثال:
 ```py
 import gradio as gr
 from transformers import (
    load_tool,
    ReactCodeAgent,
    HfEngine,
    stream_to_gradio,
 )
 # Import tool from Hub
 image_generation_tool = load_tool("m-ric/text-to-image")
 llm_engine = HfEngine("meta-llama/Meta-Llama-3-70B-Instruct")
 # Initialize the agent with the image generation tool
 agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
 def interact_with_agent(task):
    messages = []
    messages.append(gr.ChatMessage(role="user", content=task))
    yield messages
    for msg in stream_to_gradio(agent, task):
        messages.append(msg)
        yield messages + [
            gr.ChatMessage(role="assistant", content="⏳ Task not finished yet!")
        ]
    yield messages
 with gr.Blocks() as demo:
    text_input = gr.Textbox(lines=1, label="Chat Message", value="Make me a picture of the Statue of Liberty.")
    submit = gr.Button("Run illustrator agent!")
    chatbot = gr.Chatbot(
        label="Agent",
        type="messages",
        avatar_images=(
            None,
            "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
        ),
    )
    submit.click(interact_with_agent, [text_input], [chatbot])
 if __name__ == "__main__":
    demo.launch()
 ```
--- a/Show More
+++ b/Show More
`@ -67,4 +67,4 @@ def main():`


	`if __name__ == "__main__":`	`if __name__ == "__main__":`
	`main()`	`main()`